当前位置: 首页>>代码示例>>Python>>正文


Python pandas.read_stata方法代码示例

本文整理汇总了Python中pandas.read_stata方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.read_stata方法的具体用法?Python pandas.read_stata怎么用?Python pandas.read_stata使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pandas的用法示例。


在下文中一共展示了pandas.read_stata方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_encoding

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def test_encoding(self, version):

        # GH 4626, proper encoding handling
        raw = read_stata(self.dta_encoding)
        with tm.assert_produces_warning(FutureWarning):
            encoded = read_stata(self.dta_encoding, encoding='latin-1')
        result = encoded.kreis1849[0]

        expected = raw.kreis1849[0]
        assert result == expected
        assert isinstance(result, compat.string_types)

        with tm.ensure_clean() as path:
            with tm.assert_produces_warning(FutureWarning):
                encoded.to_stata(path, write_index=False, version=version,
                                 encoding='latin-1')
            reread_encoded = read_stata(path)
            tm.assert_frame_equal(encoded, reread_encoded) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:20,代码来源:test_stata.py

示例2: test_dtype_conversion

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def test_dtype_conversion(self):
        expected = self.read_csv(self.csv15)
        expected['byte_'] = expected['byte_'].astype(np.int8)
        expected['int_'] = expected['int_'].astype(np.int16)
        expected['long_'] = expected['long_'].astype(np.int32)
        expected['float_'] = expected['float_'].astype(np.float32)
        expected['double_'] = expected['double_'].astype(np.float64)
        expected['date_td'] = expected['date_td'].apply(datetime.strptime,
                                                        args=('%Y-%m-%d',))

        no_conversion = read_stata(self.dta15_117,
                                   convert_dates=True)
        tm.assert_frame_equal(expected, no_conversion)

        conversion = read_stata(self.dta15_117,
                                convert_dates=True,
                                preserve_dtypes=False)

        # read_csv types are the same
        expected = self.read_csv(self.csv15)
        expected['date_td'] = expected['date_td'].apply(datetime.strptime,
                                                        args=('%Y-%m-%d',))

        tm.assert_frame_equal(expected, conversion) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:26,代码来源:test_stata.py

示例3: test_out_of_range_float

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def test_out_of_range_float(self):
        original = DataFrame({'ColumnOk': [0.0,
                                           np.finfo(np.float32).eps,
                                           np.finfo(np.float32).max / 10.0],
                              'ColumnTooBig': [0.0,
                                               np.finfo(np.float32).eps,
                                               np.finfo(np.float32).max]})
        original.index.name = 'index'
        for col in original:
            original[col] = original[col].astype(np.float32)

        with tm.ensure_clean() as path:
            original.to_stata(path)
            reread = read_stata(path)
            original['ColumnTooBig'] = original['ColumnTooBig'].astype(
                np.float64)
            tm.assert_frame_equal(original,
                                  reread.set_index('index'))

        original.loc[2, 'ColumnTooBig'] = np.inf
        msg = ("Column ColumnTooBig has a maximum value of infinity which"
               " is outside the range supported by Stata")
        with pytest.raises(ValueError, match=msg):
            with tm.ensure_clean() as path:
                original.to_stata(path) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:27,代码来源:test_stata.py

示例4: test_date_parsing_ignores_format_details

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def test_date_parsing_ignores_format_details(self, column):
        # GH 17797
        #
        # Test that display formats are ignored when determining if a numeric
        # column is a date value.
        #
        # All date types are stored as numbers and format associated with the
        # column denotes both the type of the date and the display format.
        #
        # STATA supports 9 date types which each have distinct units. We test 7
        # of the 9 types, ignoring %tC and %tb. %tC is a variant of %tc that
        # accounts for leap seconds and %tb relies on STATAs business calendar.
        df = read_stata(self.stata_dates)
        unformatted = df.loc[0, column]
        formatted = df.loc[0, column + "_fmt"]
        assert unformatted == formatted 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:18,代码来源:test_stata.py

示例5: test_mixed_string_strl

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def test_mixed_string_strl(self):
        # GH 23633
        output = [
            {'mixed': 'string' * 500,
             'number': 0},
            {'mixed': None,
             'number': 1}
        ]
        output = pd.DataFrame(output)
        output.number = output.number.astype('int32')

        with tm.ensure_clean() as path:
            output.to_stata(path, write_index=False, version=117)
            reread = read_stata(path)
            expected = output.fillna('')
            tm.assert_frame_equal(reread, expected)

            # Check strl supports all None (null)
            output.loc[:, 'mixed'] = None
            output.to_stata(path, write_index=False, convert_strl=['mixed'],
                            version=117)
            reread = read_stata(path)
            expected = output.fillna('')
            tm.assert_frame_equal(reread, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:26,代码来源:test_stata.py

示例6: setup_class

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def setup_class(cls, true, order, trend, error_cov_type, cov_type='approx',
                 **kwargs):
        cls.true = true
        # 1960:Q1 - 1982:Q4
        with open(current_path + os.sep + 'results' + os.sep + 'manufac.dta', 'rb') as test_data:
            dta = pd.read_stata(test_data)
        dta.index = pd.DatetimeIndex(dta.month, freq='MS')
        dta['dlncaputil'] = dta['lncaputil'].diff()
        dta['dlnhours'] = dta['lnhours'].diff()

        endog = dta.loc['1972-02-01':, ['dlncaputil', 'dlnhours']]

        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter('always')
            cls.model = varmax.VARMAX(endog, order=order, trend=trend,
                                       error_cov_type=error_cov_type, **kwargs)

        cls.results = cls.model.smooth(true['params'], cov_type=cov_type) 
开发者ID:birforce,项目名称:vnpy_crypto,代码行数:20,代码来源:test_varmax.py

示例7: test_encoding

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def test_encoding(self, version):

        # GH 4626, proper encoding handling
        raw = read_stata(self.dta_encoding)
        encoded = read_stata(self.dta_encoding, encoding="latin-1")
        result = encoded.kreis1849[0]

        if compat.PY3:
            expected = raw.kreis1849[0]
            assert result == expected
            assert isinstance(result, compat.string_types)
        else:
            expected = raw.kreis1849.str.decode("latin-1")[0]
            assert result == expected
            assert isinstance(result, unicode)  # noqa

        with tm.ensure_clean() as path:
            encoded.to_stata(path, encoding='latin-1',
                             write_index=False, version=version)
            reread_encoded = read_stata(path, encoding='latin-1')
            tm.assert_frame_equal(encoded, reread_encoded) 
开发者ID:birforce,项目名称:vnpy_crypto,代码行数:23,代码来源:test_stata.py

示例8: test_out_of_range_float

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def test_out_of_range_float(self):
        original = DataFrame({'ColumnOk': [0.0,
                                           np.finfo(np.float32).eps,
                                           np.finfo(np.float32).max / 10.0],
                              'ColumnTooBig': [0.0,
                                               np.finfo(np.float32).eps,
                                               np.finfo(np.float32).max]})
        original.index.name = 'index'
        for col in original:
            original[col] = original[col].astype(np.float32)

        with tm.ensure_clean() as path:
            original.to_stata(path)
            reread = read_stata(path)
            original['ColumnTooBig'] = original['ColumnTooBig'].astype(
                np.float64)
            tm.assert_frame_equal(original,
                                  reread.set_index('index'))

        original.loc[2, 'ColumnTooBig'] = np.inf
        with pytest.raises(ValueError) as cm:
            with tm.ensure_clean() as path:
                original.to_stata(path)
            assert 'ColumnTooBig' in cm.exception
            assert 'infinity' in cm.exception 
开发者ID:birforce,项目名称:vnpy_crypto,代码行数:27,代码来源:test_stata.py

示例9: read_stata

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def read_stata(filepath_or_buffer, convert_dates=True,
               convert_categoricals=True, encoding=None, index_col=None,
               convert_missing=False, preserve_dtypes=True, columns=None,
               order_categoricals=True, chunksize=None, iterator=False):

    reader = StataReader(filepath_or_buffer,
                         convert_dates=convert_dates,
                         convert_categoricals=convert_categoricals,
                         index_col=index_col, convert_missing=convert_missing,
                         preserve_dtypes=preserve_dtypes,
                         columns=columns,
                         order_categoricals=order_categoricals,
                         chunksize=chunksize, encoding=encoding)

    if iterator or chunksize:
        data = reader
    else:
        try:
            data = reader.read()
        finally:
            reader.close()
    return data 
开发者ID:birforce,项目名称:vnpy_crypto,代码行数:24,代码来源:stata.py

示例10: setup_class

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def setup_class(cls):
        """Stata reg output from `sysuse auto; reg price mpg`"""
        cls.init(cls)
        test_path = path.split(path.relpath(__file__))[0]
        auto_path = path.join(test_path, 'data', 'auto.dta')
        autodata = pd.read_stata(auto_path)
        y = 'price'
        x_end = ['mpg', 'length']
        z = ['trunk', 'weight', 'headroom']
        x_exog = []
        nosingles = True
        cls.result = ivreg(autodata, y, x_end, z, x_exog,
                           addcons=True,
                           iv_method='liml',
                           nosingles=nosingles)
        cls.expected = liml_std 
开发者ID:dmsul,项目名称:econtools,代码行数:18,代码来源:test_liml.py

示例11: parsed_114

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def parsed_114(dirpath):
    dta14_114 = os.path.join(dirpath, 'stata5_114.dta')
    parsed_114 = read_stata(dta14_114, convert_dates=True)
    parsed_114.index.name = 'index'
    return parsed_114 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:7,代码来源:test_stata.py

示例12: read_dta

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def read_dta(self, file):
        # Legacy default reader configuration
        return read_stata(file, convert_dates=True) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:5,代码来源:test_stata.py

示例13: test_read_empty_dta

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def test_read_empty_dta(self, version):
        empty_ds = DataFrame(columns=['unit'])
        # GH 7369, make sure can read a 0-obs dta file
        with tm.ensure_clean() as path:
            empty_ds.to_stata(path, write_index=False, version=version)
            empty_ds2 = read_stata(path)
            tm.assert_frame_equal(empty_ds, empty_ds2) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:9,代码来源:test_stata.py

示例14: test_105

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def test_105(self):
        # Data obtained from:
        # http://go.worldbank.org/ZXY29PVJ21
        dpath = os.path.join(self.dirpath, 'S4_EDUC1.dta')
        df = pd.read_stata(dpath)
        df0 = [[1, 1, 3, -2], [2, 1, 2, -2], [4, 1, 1, -2]]
        df0 = pd.DataFrame(df0)
        df0.columns = ["clustnum", "pri_schl", "psch_num", "psch_dis"]
        df0['clustnum'] = df0["clustnum"].astype(np.int16)
        df0['pri_schl'] = df0["pri_schl"].astype(np.int8)
        df0['psch_num'] = df0["psch_num"].astype(np.int8)
        df0['psch_dis'] = df0["psch_dis"].astype(np.float32)
        tm.assert_frame_equal(df.head(3), df0) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:15,代码来源:test_stata.py

示例15: test_drop_column

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def test_drop_column(self):
        expected = self.read_csv(self.csv15)
        expected['byte_'] = expected['byte_'].astype(np.int8)
        expected['int_'] = expected['int_'].astype(np.int16)
        expected['long_'] = expected['long_'].astype(np.int32)
        expected['float_'] = expected['float_'].astype(np.float32)
        expected['double_'] = expected['double_'].astype(np.float64)
        expected['date_td'] = expected['date_td'].apply(datetime.strptime,
                                                        args=('%Y-%m-%d',))

        columns = ['byte_', 'int_', 'long_']
        expected = expected[columns]
        dropped = read_stata(self.dta15_117, convert_dates=True,
                             columns=columns)

        tm.assert_frame_equal(expected, dropped)

        # See PR 10757
        columns = ['int_', 'long_', 'byte_']
        expected = expected[columns]
        reordered = read_stata(self.dta15_117, convert_dates=True,
                               columns=columns)
        tm.assert_frame_equal(expected, reordered)

        msg = "columns contains duplicate entries"
        with pytest.raises(ValueError, match=msg):
            columns = ['byte_', 'byte_']
            read_stata(self.dta15_117, convert_dates=True, columns=columns)

        msg = ("The following columns were not found in the Stata data set:"
               " not_found")
        with pytest.raises(ValueError, match=msg):
            columns = ['byte_', 'int_', 'long_', 'not_found']
            read_stata(self.dta15_117, convert_dates=True, columns=columns) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:36,代码来源:test_stata.py


注:本文中的pandas.read_stata方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。