当前位置: 首页>>代码示例>>Python>>正文


Python TextParser.read方法代码示例

本文整理汇总了Python中pandas.io.parsers.TextParser.read方法的典型用法代码示例。如果您正苦于以下问题:Python TextParser.read方法的具体用法?Python TextParser.read怎么用?Python TextParser.read使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pandas.io.parsers.TextParser的用法示例。


在下文中一共展示了TextParser.read方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _data_to_frame

# 需要导入模块: from pandas.io.parsers import TextParser [as 别名]
# 或者: from pandas.io.parsers.TextParser import read [as 别名]
def _data_to_frame(data, header, index_col, skiprows, infer_types, parse_dates, tupleize_cols, thousands):
    head, body, foot = data

    if head:
        body = [head] + body

        if header is None:  # special case when a table has <th> elements
            header = 0

    if foot:
        body += [foot]

    # fill out elements of body that are "ragged"
    _expand_elements(body)

    tp = TextParser(
        body,
        header=header,
        index_col=index_col,
        skiprows=_get_skiprows(skiprows),
        parse_dates=parse_dates,
        tupleize_cols=tupleize_cols,
        thousands=thousands,
    )
    df = tp.read()
    return df
开发者ID:pardusnimr,项目名称:adelscrapper,代码行数:28,代码来源:html.py

示例2: test_read_text_list

# 需要导入模块: from pandas.io.parsers import TextParser [as 别名]
# 或者: from pandas.io.parsers.TextParser import read [as 别名]
    def test_read_text_list(self):
        data = """A,B,C\nfoo,1,2,3\nbar,4,5,6"""
        as_list = [["A", "B", "C"], ["foo", "1", "2", "3"], ["bar", "4", "5", "6"]]
        df = self.read_csv(StringIO(data), index_col=0)

        parser = TextParser(as_list, index_col=0, chunksize=2)
        chunk = parser.read(None)

        tm.assert_frame_equal(chunk, df)
开发者ID:flamingbear,项目名称:pandas,代码行数:11,代码来源:common.py

示例3: test_read_text_list

# 需要导入模块: from pandas.io.parsers import TextParser [as 别名]
# 或者: from pandas.io.parsers.TextParser import read [as 别名]
    def test_read_text_list(self):
        data = """A,B,C\nfoo,1,2,3\nbar,4,5,6"""
        as_list = [['A', 'B', 'C'], ['foo', '1', '2', '3'], ['bar',
                                                             '4', '5', '6']]
        df = self.read_csv(StringIO(data), index_col=0)

        parser = TextParser(as_list, index_col=0, chunksize=2)
        chunk = parser.read(None)

        tm.assert_frame_equal(chunk, df)
开发者ID:aFraley,项目名称:pandas,代码行数:12,代码来源:common.py

示例4: _parse_excel

# 需要导入模块: from pandas.io.parsers import TextParser [as 别名]
# 或者: from pandas.io.parsers.TextParser import read [as 别名]
    def _parse_excel(self, sheetname, header=0, skiprows=None, skip_footer=0,
                     index_col=None, has_index_names=None, parse_cols=None,
                     parse_dates=False, date_parser=None, na_values=None,
                     thousands=None, chunksize=None, **kwds):
        from xlrd import (xldate_as_tuple, XL_CELL_DATE,
                          XL_CELL_ERROR, XL_CELL_BOOLEAN)

        datemode = self.book.datemode
        if isinstance(sheetname, compat.string_types):
            sheet = self.book.sheet_by_name(sheetname)
        else:  # assume an integer if not a string
            sheet = self.book.sheet_by_index(sheetname)

        data = []
        should_parse = {}
        for i in range(sheet.nrows):
            row = []
            for j, (value, typ) in enumerate(zip(sheet.row_values(i),
                                                 sheet.row_types(i))):
                if parse_cols is not None and j not in should_parse:
                    should_parse[j] = self._should_parse(j, parse_cols)

                if parse_cols is None or should_parse[j]:
                    if typ == XL_CELL_DATE:
                        dt = xldate_as_tuple(value, datemode)
                        # how to produce this first case?
                        if dt[0] < datetime.MINYEAR:  # pragma: no cover
                            value = datetime.time(*dt[3:])
                        else:
                            value = datetime.datetime(*dt)
                    elif typ == XL_CELL_ERROR:
                        value = np.nan
                    elif typ == XL_CELL_BOOLEAN:
                        value = bool(value)
                    row.append(value)

            data.append(row)

        if header is not None:
            data[header] = _trim_excel_header(data[header])

        parser = TextParser(data, header=header, index_col=index_col,
                            has_index_names=has_index_names,
                            na_values=na_values,
                            thousands=thousands,
                            parse_dates=parse_dates,
                            date_parser=date_parser,
                            skiprows=skiprows,
                            skip_footer=skip_footer,
                            chunksize=chunksize,
                            **kwds)

        return parser.read()
开发者ID:jtornero,项目名称:pandas,代码行数:55,代码来源:excel.py

示例5: _data_to_frame

# 需要导入模块: from pandas.io.parsers import TextParser [as 别名]
# 或者: from pandas.io.parsers.TextParser import read [as 别名]
def _data_to_frame(**kwargs):
    head, body, foot = kwargs.pop('data')
    header = kwargs.pop('header')
    kwargs['skiprows'] = _get_skiprows(kwargs['skiprows'])
    if head:
        rows = lrange(len(head))
        body = head + body
        if header is None:  # special case when a table has <th> elements
            header = 0 if rows == [0] else rows

    if foot:
        body += [foot]

    # fill out elements of body that are "ragged"
    _expand_elements(body)
    tp = TextParser(body, header=header, **kwargs)
    df = tp.read()
    return df
开发者ID:AllenDowney,项目名称:pandas,代码行数:20,代码来源:html.py

示例6: _data_to_frame

# 需要导入模块: from pandas.io.parsers import TextParser [as 别名]
# 或者: from pandas.io.parsers.TextParser import read [as 别名]
def _data_to_frame(data, header, index_col, skiprows, infer_types,
                   parse_dates, tupleize_cols, thousands):
    head, body, _ = data  # _ is footer which is rarely used: ignore for now

    if head:
        body = [head] + body

        if header is None:  # special case when a table has <th> elements
            header = 0

    # fill out elements of body that are "ragged"
    _expand_elements(body)

    tp = TextParser(body, header=header, index_col=index_col,
                    skiprows=_get_skiprows(skiprows),
                    parse_dates=parse_dates, tupleize_cols=tupleize_cols,
                    thousands=thousands)
    df = tp.read()
    return df
开发者ID:BorisVerk,项目名称:pandas,代码行数:21,代码来源:html.py

示例7: _data_to_frame

# 需要导入模块: from pandas.io.parsers import TextParser [as 别名]
# 或者: from pandas.io.parsers.TextParser import read [as 别名]
def _data_to_frame(**kwargs):
    head, body, foot = kwargs.pop('data')
    header = kwargs.pop('header')
    kwargs['skiprows'] = _get_skiprows(kwargs['skiprows'])
    if head:
        body = head + body

        # Infer header when there is a <thead> or top <th>-only rows
        if header is None:
            if len(head) == 1:
                header = 0
            else:
                # ignore all-empty-text rows
                header = [i for i, row in enumerate(head)
                          if any(text for text in row)]

    if foot:
        body += foot

    # fill out elements of body that are "ragged"
    _expand_elements(body)
    tp = TextParser(body, header=header, **kwargs)
    df = tp.read()
    return df
开发者ID:TomAugspurger,项目名称:pandas,代码行数:26,代码来源:html.py

示例8: _data_to_frame

# 需要导入模块: from pandas.io.parsers import TextParser [as 别名]
# 或者: from pandas.io.parsers.TextParser import read [as 别名]
def _data_to_frame(data, header, index_col, skiprows, infer_types,
                   parse_dates, tupleize_cols, thousands):
    head, body, _ = data  # _ is footer which is rarely used: ignore for now

    if head:
        body = [head] + body

        if header is None:  # special case when a table has <th> elements
            header = 0

    # fill out elements of body that are "ragged"
    _expand_elements(body)

    tp = TextParser(body, header=header, index_col=index_col,
                    skiprows=_get_skiprows(skiprows),
                    parse_dates=parse_dates, tupleize_cols=tupleize_cols,
                    thousands=thousands)
    df = tp.read()

    if infer_types:  # TODO: rm this code so infer_types has no effect in 0.14
        df = df.convert_objects(convert_dates='coerce')
    else:
        df = df.applymap(text_type)
    return df
开发者ID:ArbiterGames,项目名称:BasicPythonLinearRegression,代码行数:26,代码来源:html.py

示例9: _parse_excel

# 需要导入模块: from pandas.io.parsers import TextParser [as 别名]
# 或者: from pandas.io.parsers.TextParser import read [as 别名]
    def _parse_excel(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
                     index_col=None, has_index_names=None, parse_cols=None,
                     parse_dates=False, date_parser=None, na_values=None,
                     thousands=None, chunksize=None, convert_float=True,
                     **kwds):
        import xlrd
        from xlrd import (xldate, XL_CELL_DATE,
                          XL_CELL_ERROR, XL_CELL_BOOLEAN,
                          XL_CELL_NUMBER)

        epoch1904 = self.book.datemode

        # xlrd >= 0.9.3 can return datetime objects directly.
        if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"):
            xlrd_0_9_3 = True
        else:
            xlrd_0_9_3 = False

        if isinstance(sheetname, compat.string_types):
            sheet = self.book.sheet_by_name(sheetname)
        else:  # assume an integer if not a string
            sheet = self.book.sheet_by_index(sheetname)

        data = []
        should_parse = {}
        for i in range(sheet.nrows):
            row = []
            for j, (value, typ) in enumerate(zip(sheet.row_values(i),
                                                 sheet.row_types(i))):
                if parse_cols is not None and j not in should_parse:
                    should_parse[j] = self._should_parse(j, parse_cols)

                if parse_cols is None or should_parse[j]:
                    if typ == XL_CELL_DATE:
                        if xlrd_0_9_3:
                            # Use the newer xlrd datetime handling.
                            value = xldate.xldate_as_datetime(value, epoch1904)

                            # Excel doesn't distinguish between dates and time,
                            # so we treat dates on the epoch as times only.
                            # Also, Excel supports 1900 and 1904 epochs.
                            year = (value.timetuple())[0:3]
                            if ((not epoch1904 and year == (1899, 12, 31))
                                    or (epoch1904 and year == (1904, 1, 1))):
                                    value = datetime.time(value.hour,
                                                          value.minute,
                                                          value.second,
                                                          value.microsecond)
                        else:
                            # Use the xlrd <= 0.9.2 date handling.
                            dt = xldate.xldate_as_tuple(value, epoch1904)

                            if dt[0] < datetime.MINYEAR:
                                value = datetime.time(*dt[3:])
                            else:
                                value = datetime.datetime(*dt)

                    elif typ == XL_CELL_ERROR:
                        value = np.nan
                    elif typ == XL_CELL_BOOLEAN:
                        value = bool(value)
                    elif convert_float and typ == XL_CELL_NUMBER:
                        # GH5394 - Excel 'numbers' are always floats
                        # it's a minimal perf hit and less suprising
                        val = int(value)
                        if val == value:
                            value = val

                    row.append(value)

            data.append(row)

        if header is not None:
            data[header] = _trim_excel_header(data[header])

        parser = TextParser(data, header=header, index_col=index_col,
                            has_index_names=has_index_names,
                            na_values=na_values,
                            thousands=thousands,
                            parse_dates=parse_dates,
                            date_parser=date_parser,
                            skiprows=skiprows,
                            skip_footer=skip_footer,
                            chunksize=chunksize,
                            **kwds)

        return parser.read()
开发者ID:Autodidact24,项目名称:pandas,代码行数:89,代码来源:excel.py

示例10: _parse_excel

# 需要导入模块: from pandas.io.parsers import TextParser [as 别名]
# 或者: from pandas.io.parsers.TextParser import read [as 别名]

#.........这里部分代码省略.........
                    cell_contents = xldate.xldate_as_datetime(cell_contents,
                                                              epoch1904)

                    # Excel doesn't distinguish between dates and time,
                    # so we treat dates on the epoch as times only.
                    # Also, Excel supports 1900 and 1904 epochs.
                    year = (cell_contents.timetuple())[0:3]
                    if ((not epoch1904 and year == (1899, 12, 31))
                            or (epoch1904 and year == (1904, 1, 1))):
                        cell_contents = datetime.time(cell_contents.hour,
                                              cell_contents.minute,
                                              cell_contents.second,
                                              cell_contents.microsecond)
                else:
                    # Use the xlrd <= 0.9.2 date handling.
                    dt = xldate.xldate_as_tuple(cell_contents, epoch1904)

                    if dt[0] < datetime.MINYEAR:
                        cell_contents = datetime.time(*dt[3:])
                    else:
                        cell_contents = datetime.datetime(*dt)

            elif cell_typ == XL_CELL_ERROR:
                cell_contents = np.nan
            elif cell_typ == XL_CELL_BOOLEAN:
                cell_contents = bool(cell_contents)
            elif convert_float and cell_typ == XL_CELL_NUMBER:
                # GH5394 - Excel 'numbers' are always floats
                # it's a minimal perf hit and less suprising
                val = int(cell_contents)
                if val == cell_contents:
                    cell_contents = val
            return cell_contents

        # xlrd >= 0.9.3 can return datetime objects directly.
        if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"):
            xlrd_0_9_3 = True
        else:
            xlrd_0_9_3 = False
        
        ret_dict = False
        
        #Keep sheetname to maintain backwards compatibility.
        if isinstance(sheetname, list):
            sheets = sheetname
            ret_dict = True
        elif sheetname is None:
            sheets = self.sheet_names
            ret_dict = True
        else:
            sheets = [sheetname]
        
        #handle same-type duplicates.
        sheets = list(set(sheets))
        
        output = {}
        
        for asheetname in sheets:
            if verbose:
                print("Reading sheet %s" % asheetname)
            
            if isinstance(asheetname, compat.string_types):
                sheet = self.book.sheet_by_name(asheetname)
            else:  # assume an integer if not a string    
                sheet = self.book.sheet_by_index(asheetname)   
            
            data = []
            should_parse = {}
            
            for i in range(sheet.nrows):
                row = []
                for j, (value, typ) in enumerate(zip(sheet.row_values(i),
                                                     sheet.row_types(i))):
                    if parse_cols is not None and j not in should_parse:
                        should_parse[j] = self._should_parse(j, parse_cols)
    
                    if parse_cols is None or should_parse[j]:
                        row.append(_parse_cell(value,typ))
                data.append(row)
    
            if header is not None:
                data[header] = _trim_excel_header(data[header])
    
            parser = TextParser(data, header=header, index_col=index_col,
                                has_index_names=has_index_names,
                                na_values=na_values,
                                thousands=thousands,
                                parse_dates=parse_dates,
                                date_parser=date_parser,
                                skiprows=skiprows,
                                skip_footer=skip_footer,
                                chunksize=chunksize,
                                **kwds)
            
            output[asheetname] = parser.read()
            
        if ret_dict:
            return output
        else:
            return output[asheetname]
开发者ID:legolin,项目名称:pandas,代码行数:104,代码来源:excel.py

示例11: parse

# 需要导入模块: from pandas.io.parsers import TextParser [as 别名]
# 或者: from pandas.io.parsers.TextParser import read [as 别名]

#.........这里部分代码省略.........
        output = OrderedDict()

        for asheetname in sheets:
            if verbose:
                print("Reading sheet {sheet}".format(sheet=asheetname))

            if isinstance(asheetname, compat.string_types):
                sheet = self.get_sheet_by_name(asheetname)
            else:  # assume an integer if not a string
                sheet = self.get_sheet_by_index(asheetname)

            data = self.get_sheet_data(sheet, convert_float)
            usecols = _maybe_convert_usecols(usecols)

            if sheet.nrows == 0:
                output[asheetname] = DataFrame()
                continue

            if is_list_like(header) and len(header) == 1:
                header = header[0]

            # forward fill and pull out names for MultiIndex column
            header_names = None
            if header is not None and is_list_like(header):
                header_names = []
                control_row = [True] * len(data[0])

                for row in header:
                    if is_integer(skiprows):
                        row += skiprows

                    data[row], control_row = _fill_mi_header(data[row],
                                                             control_row)

                    if index_col is not None:
                        header_name, _ = _pop_header_name(data[row], index_col)
                        header_names.append(header_name)

            if is_list_like(index_col):
                # Forward fill values for MultiIndex index.
                if not is_list_like(header):
                    offset = 1 + header
                else:
                    offset = 1 + max(header)

                # Check if we have an empty dataset
                # before trying to collect data.
                if offset < len(data):
                    for col in index_col:
                        last = data[offset][col]

                        for row in range(offset + 1, len(data)):
                            if data[row][col] == '' or data[row][col] is None:
                                data[row][col] = last
                            else:
                                last = data[row][col]

            has_index_names = is_list_like(header) and len(header) > 1

            # GH 12292 : error when read one empty column from excel file
            try:
                parser = TextParser(data,
                                    names=names,
                                    header=header,
                                    index_col=index_col,
                                    has_index_names=has_index_names,
                                    squeeze=squeeze,
                                    dtype=dtype,
                                    true_values=true_values,
                                    false_values=false_values,
                                    skiprows=skiprows,
                                    nrows=nrows,
                                    na_values=na_values,
                                    parse_dates=parse_dates,
                                    date_parser=date_parser,
                                    thousands=thousands,
                                    comment=comment,
                                    skipfooter=skipfooter,
                                    usecols=usecols,
                                    mangle_dupe_cols=mangle_dupe_cols,
                                    **kwds)

                output[asheetname] = parser.read(nrows=nrows)

                if not squeeze or isinstance(output[asheetname], DataFrame):
                    if header_names:
                        output[asheetname].columns = output[
                            asheetname].columns.set_names(header_names)
                    elif compat.PY2:
                        output[asheetname].columns = _maybe_convert_to_string(
                            output[asheetname].columns)

            except EmptyDataError:
                # No Data, return an empty DataFrame
                output[asheetname] = DataFrame()

        if ret_dict:
            return output
        else:
            return output[asheetname]
开发者ID:josham,项目名称:pandas,代码行数:104,代码来源:_base.py


注:本文中的pandas.io.parsers.TextParser.read方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。