本文整理汇总了Python中pandas.read_stata方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.read_stata方法的具体用法?Python pandas.read_stata怎么用?Python pandas.read_stata使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas
的用法示例。
在下文中一共展示了pandas.read_stata方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_encoding
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def test_encoding(self, version):
# GH 4626, proper encoding handling
raw = read_stata(self.dta_encoding)
with tm.assert_produces_warning(FutureWarning):
encoded = read_stata(self.dta_encoding, encoding='latin-1')
result = encoded.kreis1849[0]
expected = raw.kreis1849[0]
assert result == expected
assert isinstance(result, compat.string_types)
with tm.ensure_clean() as path:
with tm.assert_produces_warning(FutureWarning):
encoded.to_stata(path, write_index=False, version=version,
encoding='latin-1')
reread_encoded = read_stata(path)
tm.assert_frame_equal(encoded, reread_encoded)
示例2: test_dtype_conversion
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def test_dtype_conversion(self):
expected = self.read_csv(self.csv15)
expected['byte_'] = expected['byte_'].astype(np.int8)
expected['int_'] = expected['int_'].astype(np.int16)
expected['long_'] = expected['long_'].astype(np.int32)
expected['float_'] = expected['float_'].astype(np.float32)
expected['double_'] = expected['double_'].astype(np.float64)
expected['date_td'] = expected['date_td'].apply(datetime.strptime,
args=('%Y-%m-%d',))
no_conversion = read_stata(self.dta15_117,
convert_dates=True)
tm.assert_frame_equal(expected, no_conversion)
conversion = read_stata(self.dta15_117,
convert_dates=True,
preserve_dtypes=False)
# read_csv types are the same
expected = self.read_csv(self.csv15)
expected['date_td'] = expected['date_td'].apply(datetime.strptime,
args=('%Y-%m-%d',))
tm.assert_frame_equal(expected, conversion)
示例3: test_out_of_range_float
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def test_out_of_range_float(self):
original = DataFrame({'ColumnOk': [0.0,
np.finfo(np.float32).eps,
np.finfo(np.float32).max / 10.0],
'ColumnTooBig': [0.0,
np.finfo(np.float32).eps,
np.finfo(np.float32).max]})
original.index.name = 'index'
for col in original:
original[col] = original[col].astype(np.float32)
with tm.ensure_clean() as path:
original.to_stata(path)
reread = read_stata(path)
original['ColumnTooBig'] = original['ColumnTooBig'].astype(
np.float64)
tm.assert_frame_equal(original,
reread.set_index('index'))
original.loc[2, 'ColumnTooBig'] = np.inf
msg = ("Column ColumnTooBig has a maximum value of infinity which"
" is outside the range supported by Stata")
with pytest.raises(ValueError, match=msg):
with tm.ensure_clean() as path:
original.to_stata(path)
示例4: test_date_parsing_ignores_format_details
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def test_date_parsing_ignores_format_details(self, column):
# GH 17797
#
# Test that display formats are ignored when determining if a numeric
# column is a date value.
#
# All date types are stored as numbers and format associated with the
# column denotes both the type of the date and the display format.
#
# STATA supports 9 date types which each have distinct units. We test 7
# of the 9 types, ignoring %tC and %tb. %tC is a variant of %tc that
# accounts for leap seconds and %tb relies on STATAs business calendar.
df = read_stata(self.stata_dates)
unformatted = df.loc[0, column]
formatted = df.loc[0, column + "_fmt"]
assert unformatted == formatted
示例5: test_mixed_string_strl
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def test_mixed_string_strl(self):
# GH 23633
output = [
{'mixed': 'string' * 500,
'number': 0},
{'mixed': None,
'number': 1}
]
output = pd.DataFrame(output)
output.number = output.number.astype('int32')
with tm.ensure_clean() as path:
output.to_stata(path, write_index=False, version=117)
reread = read_stata(path)
expected = output.fillna('')
tm.assert_frame_equal(reread, expected)
# Check strl supports all None (null)
output.loc[:, 'mixed'] = None
output.to_stata(path, write_index=False, convert_strl=['mixed'],
version=117)
reread = read_stata(path)
expected = output.fillna('')
tm.assert_frame_equal(reread, expected)
示例6: setup_class
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def setup_class(cls, true, order, trend, error_cov_type, cov_type='approx',
**kwargs):
cls.true = true
# 1960:Q1 - 1982:Q4
with open(current_path + os.sep + 'results' + os.sep + 'manufac.dta', 'rb') as test_data:
dta = pd.read_stata(test_data)
dta.index = pd.DatetimeIndex(dta.month, freq='MS')
dta['dlncaputil'] = dta['lncaputil'].diff()
dta['dlnhours'] = dta['lnhours'].diff()
endog = dta.loc['1972-02-01':, ['dlncaputil', 'dlnhours']]
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
cls.model = varmax.VARMAX(endog, order=order, trend=trend,
error_cov_type=error_cov_type, **kwargs)
cls.results = cls.model.smooth(true['params'], cov_type=cov_type)
示例7: test_encoding
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def test_encoding(self, version):
# GH 4626, proper encoding handling
raw = read_stata(self.dta_encoding)
encoded = read_stata(self.dta_encoding, encoding="latin-1")
result = encoded.kreis1849[0]
if compat.PY3:
expected = raw.kreis1849[0]
assert result == expected
assert isinstance(result, compat.string_types)
else:
expected = raw.kreis1849.str.decode("latin-1")[0]
assert result == expected
assert isinstance(result, unicode) # noqa
with tm.ensure_clean() as path:
encoded.to_stata(path, encoding='latin-1',
write_index=False, version=version)
reread_encoded = read_stata(path, encoding='latin-1')
tm.assert_frame_equal(encoded, reread_encoded)
示例8: test_out_of_range_float
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def test_out_of_range_float(self):
original = DataFrame({'ColumnOk': [0.0,
np.finfo(np.float32).eps,
np.finfo(np.float32).max / 10.0],
'ColumnTooBig': [0.0,
np.finfo(np.float32).eps,
np.finfo(np.float32).max]})
original.index.name = 'index'
for col in original:
original[col] = original[col].astype(np.float32)
with tm.ensure_clean() as path:
original.to_stata(path)
reread = read_stata(path)
original['ColumnTooBig'] = original['ColumnTooBig'].astype(
np.float64)
tm.assert_frame_equal(original,
reread.set_index('index'))
original.loc[2, 'ColumnTooBig'] = np.inf
with pytest.raises(ValueError) as cm:
with tm.ensure_clean() as path:
original.to_stata(path)
assert 'ColumnTooBig' in cm.exception
assert 'infinity' in cm.exception
示例9: read_stata
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def read_stata(filepath_or_buffer, convert_dates=True,
convert_categoricals=True, encoding=None, index_col=None,
convert_missing=False, preserve_dtypes=True, columns=None,
order_categoricals=True, chunksize=None, iterator=False):
reader = StataReader(filepath_or_buffer,
convert_dates=convert_dates,
convert_categoricals=convert_categoricals,
index_col=index_col, convert_missing=convert_missing,
preserve_dtypes=preserve_dtypes,
columns=columns,
order_categoricals=order_categoricals,
chunksize=chunksize, encoding=encoding)
if iterator or chunksize:
data = reader
else:
try:
data = reader.read()
finally:
reader.close()
return data
示例10: setup_class
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def setup_class(cls):
"""Stata reg output from `sysuse auto; reg price mpg`"""
cls.init(cls)
test_path = path.split(path.relpath(__file__))[0]
auto_path = path.join(test_path, 'data', 'auto.dta')
autodata = pd.read_stata(auto_path)
y = 'price'
x_end = ['mpg', 'length']
z = ['trunk', 'weight', 'headroom']
x_exog = []
nosingles = True
cls.result = ivreg(autodata, y, x_end, z, x_exog,
addcons=True,
iv_method='liml',
nosingles=nosingles)
cls.expected = liml_std
示例11: parsed_114
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def parsed_114(dirpath):
dta14_114 = os.path.join(dirpath, 'stata5_114.dta')
parsed_114 = read_stata(dta14_114, convert_dates=True)
parsed_114.index.name = 'index'
return parsed_114
示例12: read_dta
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def read_dta(self, file):
# Legacy default reader configuration
return read_stata(file, convert_dates=True)
示例13: test_read_empty_dta
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def test_read_empty_dta(self, version):
empty_ds = DataFrame(columns=['unit'])
# GH 7369, make sure can read a 0-obs dta file
with tm.ensure_clean() as path:
empty_ds.to_stata(path, write_index=False, version=version)
empty_ds2 = read_stata(path)
tm.assert_frame_equal(empty_ds, empty_ds2)
示例14: test_105
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def test_105(self):
# Data obtained from:
# http://go.worldbank.org/ZXY29PVJ21
dpath = os.path.join(self.dirpath, 'S4_EDUC1.dta')
df = pd.read_stata(dpath)
df0 = [[1, 1, 3, -2], [2, 1, 2, -2], [4, 1, 1, -2]]
df0 = pd.DataFrame(df0)
df0.columns = ["clustnum", "pri_schl", "psch_num", "psch_dis"]
df0['clustnum'] = df0["clustnum"].astype(np.int16)
df0['pri_schl'] = df0["pri_schl"].astype(np.int8)
df0['psch_num'] = df0["psch_num"].astype(np.int8)
df0['psch_dis'] = df0["psch_dis"].astype(np.float32)
tm.assert_frame_equal(df.head(3), df0)
示例15: test_drop_column
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_stata [as 别名]
def test_drop_column(self):
expected = self.read_csv(self.csv15)
expected['byte_'] = expected['byte_'].astype(np.int8)
expected['int_'] = expected['int_'].astype(np.int16)
expected['long_'] = expected['long_'].astype(np.int32)
expected['float_'] = expected['float_'].astype(np.float32)
expected['double_'] = expected['double_'].astype(np.float64)
expected['date_td'] = expected['date_td'].apply(datetime.strptime,
args=('%Y-%m-%d',))
columns = ['byte_', 'int_', 'long_']
expected = expected[columns]
dropped = read_stata(self.dta15_117, convert_dates=True,
columns=columns)
tm.assert_frame_equal(expected, dropped)
# See PR 10757
columns = ['int_', 'long_', 'byte_']
expected = expected[columns]
reordered = read_stata(self.dta15_117, convert_dates=True,
columns=columns)
tm.assert_frame_equal(expected, reordered)
msg = "columns contains duplicate entries"
with pytest.raises(ValueError, match=msg):
columns = ['byte_', 'byte_']
read_stata(self.dta15_117, convert_dates=True, columns=columns)
msg = ("The following columns were not found in the Stata data set:"
" not_found")
with pytest.raises(ValueError, match=msg):
columns = ['byte_', 'int_', 'long_', 'not_found']
read_stata(self.dta15_117, convert_dates=True, columns=columns)