本文整理汇总了Python中statsmodels.formula.api.ols方法的典型用法代码示例。如果您正苦于以下问题:Python api.ols方法的具体用法?Python api.ols怎么用?Python api.ols使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类statsmodels.formula.api
的用法示例。
在下文中一共展示了api.ols方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_formula_predict_series
# 需要导入模块: from statsmodels.formula import api [as 别名]
# 或者: from statsmodels.formula.api import ols [as 别名]
def test_formula_predict_series():
import pandas as pd
import pandas.util.testing as tm
data = pd.DataFrame({"y": [1, 2, 3], "x": [1, 2, 3]}, index=[5, 3, 1])
results = ols('y ~ x', data).fit()
result = results.predict(data)
expected = pd.Series([1., 2., 3.], index=[5, 3, 1])
tm.assert_series_equal(result, expected)
result = results.predict(data.x)
tm.assert_series_equal(result, expected)
result = results.predict(pd.Series([1, 2, 3], index=[1, 2, 3], name='x'))
expected = pd.Series([1., 2., 3.], index=[1, 2, 3])
tm.assert_series_equal(result, expected)
result = results.predict({"x": [1, 2, 3]})
expected = pd.Series([1., 2., 3.], index=[0, 1, 2])
tm.assert_series_equal(result, expected)
示例2: test_patsy_lazy_dict
# 需要导入模块: from statsmodels.formula import api [as 别名]
# 或者: from statsmodels.formula.api import ols [as 别名]
def test_patsy_lazy_dict():
class LazyDict(dict):
def __init__(self, data):
self.data = data
def __missing__(self, key):
return np.array(self.data[key])
data = cpunish.load_pandas().data
data = LazyDict(data)
res = ols('EXECUTIONS ~ SOUTH + INCOME', data=data).fit()
res2 = res.predict(data)
npt.assert_allclose(res.fittedvalues, res2)
data = cpunish.load_pandas().data
data['INCOME'].loc[0] = None
data = LazyDict(data)
data.index = cpunish.load_pandas().data.index
res = ols('EXECUTIONS ~ SOUTH + INCOME', data=data).fit()
res2 = res.predict(data)
assert_equal(res.fittedvalues, res2) # Should lose a record
assert_equal(len(res2) + 1, len(cpunish.load_pandas().data))
示例3: test_results
# 需要导入模块: from statsmodels.formula import api [as 别名]
# 或者: from statsmodels.formula.api import ols [as 别名]
def test_results(self):
data = self.data.drop([0,1,2])
anova_ii = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)",
data).fit()
Sum_Sq = np.array([
151.4065, 2.904723, 13.45718, 0.1905093, 27.60181
])
Df = np.array([
1, 2, 2, 51
])
F = np.array([
6.972744, 13.7804, 0.1709936, np.nan
])
PrF = np.array([
0.01095599, 1.641682e-05, 0.8433081, np.nan
])
results = anova_lm(anova_ii, typ="II", robust="hc0")
np.testing.assert_equal(results['df'].values, Df)
#np.testing.assert_almost_equal(results['sum_sq'].values, Sum_Sq, 4)
np.testing.assert_almost_equal(results['F'].values, F, 4)
np.testing.assert_almost_equal(results['PR(>F)'].values, PrF)
示例4: test_formula_missing_cat
# 需要导入模块: from statsmodels.formula import api [as 别名]
# 或者: from statsmodels.formula.api import ols [as 别名]
def test_formula_missing_cat():
# gh-805
import statsmodels.api as sm
from statsmodels.formula.api import ols
from patsy import PatsyError
dta = sm.datasets.grunfeld.load_pandas().data
dta.loc[dta.index[0], 'firm'] = np.nan
mod = ols(formula='value ~ invest + capital + firm + year',
data=dta.dropna())
res = mod.fit()
mod2 = ols(formula='value ~ invest + capital + firm + year',
data=dta)
res2 = mod2.fit()
assert_almost_equal(res.params.values, res2.params.values)
assert_raises(PatsyError, ols, 'value ~ invest + capital + firm + year',
data=dta, missing='raise')
示例5: anova
# 需要导入模块: from statsmodels.formula import api [as 别名]
# 或者: from statsmodels.formula.api import ols [as 别名]
def anova(data,formula):
'''方差分析
输入
--data: DataFrame格式,包含数值型变量和分类型变量
--formula:变量之间的关系,如:数值型变量~C(分类型变量1)[+C(分类型变量1)[+C(分类型变量1):(分类型变量1)]
返回[方差分析表]
[总体的方差来源于组内方差和组间方差,通过比较组间方差和组内方差的比来推断两者的差异]
--df:自由度
--sum_sq:误差平方和
--mean_sq:误差平方和/对应的自由度
--F:mean_sq之比
--PR(>F):p值,比如<0.05则代表有显著性差异
'''
import statsmodels.api as sm
from statsmodels.formula.api import ols
cw_lm=ols(formula, data=data).fit() #Specify C for Categorical
r=sm.stats.anova_lm(cw_lm)
return r
示例6: test_statsmodels
# 需要导入模块: from statsmodels.formula import api [as 别名]
# 或者: from statsmodels.formula.api import ols [as 别名]
def test_statsmodels():
statsmodels = import_module('statsmodels') # noqa
import statsmodels.api as sm
import statsmodels.formula.api as smf
df = sm.datasets.get_rdataset("Guerry", "HistData").data
smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=df).fit()
# Cython import warning
示例7: initialize
# 需要导入模块: from statsmodels.formula import api [as 别名]
# 或者: from statsmodels.formula.api import ols [as 别名]
def initialize(cls):
from statsmodels.formula.api import ols, glm, poisson
from statsmodels.discrete.discrete_model import Poisson
mod = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", cls.data)
cls.res = mod.fit(use_t=False)
示例8: setup_class
# 需要导入模块: from statsmodels.formula import api [as 别名]
# 或者: from statsmodels.formula.api import ols [as 别名]
def setup_class(cls):
from statsmodels.formula.api import ols
import statsmodels.stats.tests.test_anova as ttmod
test = ttmod.TestAnova3()
test.setup_class()
cls.data = test.data.drop([0,1,2])
mod = ols("np.log(Days+1) ~ C(Duration) + C(Weight)", cls.data)
cls.res = mod.fit()
cls.term_name = "C(Weight)"
cls.constraints = ['C(Weight)[T.2]',
'C(Weight)[T.3]',
'C(Weight)[T.3] - C(Weight)[T.2]']
示例9: test_one_column_exog
# 需要导入模块: from statsmodels.formula import api [as 别名]
# 或者: from statsmodels.formula.api import ols [as 别名]
def test_one_column_exog(self):
from statsmodels.formula.api import ols
res = ols("y~var1-1", data=self.data).fit()
fig = plot_regress_exog(res, "var1")
plt.close(fig)
res = ols("y~var1", data=self.data).fit()
fig = plot_regress_exog(res, "var1")
plt.close(fig)
示例10: setup_class
# 需要导入模块: from statsmodels.formula import api [as 别名]
# 或者: from statsmodels.formula.api import ols [as 别名]
def setup_class(cls):
data = load_pandas().data
cls.model = ols(longley_formula, data)
super(TestFormulaPandas, cls).setup_class()
示例11: test_tests
# 需要导入模块: from statsmodels.formula import api [as 别名]
# 或者: from statsmodels.formula.api import ols [as 别名]
def test_tests():
formula = 'TOTEMP ~ GNPDEFL + GNP + UNEMP + ARMED + POP + YEAR'
dta = load_pandas().data
results = ols(formula, dta).fit()
test_formula = '(GNPDEFL = GNP), (UNEMP = 2), (YEAR/1829 = 1)'
LC = make_hypotheses_matrices(results, test_formula)
R = LC.coefs
Q = LC.constants
npt.assert_almost_equal(R, [[0, 1, -1, 0, 0, 0, 0],
[0, 0 , 0, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1./1829]], 8)
npt.assert_array_equal(Q, [[0],[2],[1]])
示例12: test_formula_labels
# 需要导入模块: from statsmodels.formula import api [as 别名]
# 或者: from statsmodels.formula.api import ols [as 别名]
def test_formula_labels():
# make sure labels pass through patsy as expected
# data(Duncan) from car in R
dta = StringIO(""""type" "income" "education" "prestige"\n"accountant" "prof" 62 86 82\n"pilot" "prof" 72 76 83\n"architect" "prof" 75 92 90\n"author" "prof" 55 90 76\n"chemist" "prof" 64 86 90\n"minister" "prof" 21 84 87\n"professor" "prof" 64 93 93\n"dentist" "prof" 80 100 90\n"reporter" "wc" 67 87 52\n"engineer" "prof" 72 86 88\n"undertaker" "prof" 42 74 57\n"lawyer" "prof" 76 98 89\n"physician" "prof" 76 97 97\n"welfare.worker" "prof" 41 84 59\n"teacher" "prof" 48 91 73\n"conductor" "wc" 76 34 38\n"contractor" "prof" 53 45 76\n"factory.owner" "prof" 60 56 81\n"store.manager" "prof" 42 44 45\n"banker" "prof" 78 82 92\n"bookkeeper" "wc" 29 72 39\n"mail.carrier" "wc" 48 55 34\n"insurance.agent" "wc" 55 71 41\n"store.clerk" "wc" 29 50 16\n"carpenter" "bc" 21 23 33\n"electrician" "bc" 47 39 53\n"RR.engineer" "bc" 81 28 67\n"machinist" "bc" 36 32 57\n"auto.repairman" "bc" 22 22 26\n"plumber" "bc" 44 25 29\n"gas.stn.attendant" "bc" 15 29 10\n"coal.miner" "bc" 7 7 15\n"streetcar.motorman" "bc" 42 26 19\n"taxi.driver" "bc" 9 19 10\n"truck.driver" "bc" 21 15 13\n"machine.operator" "bc" 21 20 24\n"barber" "bc" 16 26 20\n"bartender" "bc" 16 28 7\n"shoe.shiner" "bc" 9 17 3\n"cook" "bc" 14 22 16\n"soda.clerk" "bc" 12 30 6\n"watchman" "bc" 17 25 11\n"janitor" "bc" 7 20 8\n"policeman" "bc" 34 47 41\n"waiter" "bc" 8 32 10""")
from pandas import read_table
dta = read_table(dta, sep=" ")
model = ols("prestige ~ income + education", dta).fit()
assert_equal(model.fittedvalues.index, dta.index)
示例13: test_formula_predict
# 需要导入模块: from statsmodels.formula import api [as 别名]
# 或者: from statsmodels.formula.api import ols [as 别名]
def test_formula_predict():
from numpy import log
formula = """TOTEMP ~ log(GNPDEFL) + log(GNP) + UNEMP + ARMED +
POP + YEAR"""
data = load_pandas()
dta = load_pandas().data
results = ols(formula, dta).fit()
npt.assert_almost_equal(results.fittedvalues.values,
results.predict(data.exog), 8)
示例14: test_compare_OLS
# 需要导入模块: from statsmodels.formula import api [as 别名]
# 或者: from statsmodels.formula.api import ols [as 别名]
def test_compare_OLS(self):
# Gaussian GEE with independence correlation should agree
# exactly with OLS for parameter estimates and standard errors
# derived from the naive covariance estimate.
vs = Independence()
family = Gaussian()
Y = np.random.normal(size=100)
X1 = np.random.normal(size=100)
X2 = np.random.normal(size=100)
X3 = np.random.normal(size=100)
groups = np.kron(lrange(20), np.ones(5))
D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3})
md = GEE.from_formula("Y ~ X1 + X2 + X3", groups, D,
family=family, cov_struct=vs)
mdf = md.fit()
ols = smf.ols("Y ~ X1 + X2 + X3", data=D).fit()
# don't use wrapper, asserts_xxx don't work
ols = ols._results
assert_almost_equal(ols.params, mdf.params, decimal=10)
se = mdf.standard_errors(cov_type="naive")
assert_almost_equal(ols.bse, se, decimal=10)
naive_tvalues = mdf.params / \
np.sqrt(np.diag(mdf.cov_naive))
assert_almost_equal(naive_tvalues, ols.tvalues, decimal=10)
示例15: setup_class
# 需要导入模块: from statsmodels.formula import api [as 别名]
# 或者: from statsmodels.formula.api import ols [as 别名]
def setup_class(cls):
# kidney data taken from JT's course
# don't know the license
cls.data = kidney_table
cls.kidney_lm = ols('np.log(Days+1) ~ C(Duration) * C(Weight)',
data=cls.data).fit()