本文整理汇总了Python中statsmodels.regression.linear_model.OLS.from_formula方法的典型用法代码示例。如果您正苦于以下问题:Python OLS.from_formula方法的具体用法?Python OLS.from_formula怎么用?Python OLS.from_formula使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类statsmodels.regression.linear_model.OLS
的用法示例。
在下文中一共展示了OLS.from_formula方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_from_formula
# 需要导入模块: from statsmodels.regression.linear_model import OLS [as 别名]
# 或者: from statsmodels.regression.linear_model.OLS import from_formula [as 别名]
def test_from_formula():
mod = RecursiveLS.from_formula('cpi ~ m1', data=dta)
res = mod.fit()
# Test the RLS estimates against OLS estimates
mod_ols = OLS.from_formula('cpi ~ m1', data=dta)
res_ols = mod_ols.fit()
assert_allclose(res.params, res_ols.params)
示例2: test_from_formula
# 需要导入模块: from statsmodels.regression.linear_model import OLS [as 别名]
# 或者: from statsmodels.regression.linear_model.OLS import from_formula [as 别名]
def test_from_formula():
with pytest.warns(ValueWarning, match="No frequency information"):
mod = RecursiveLS.from_formula('cpi ~ m1', data=dta)
res = mod.fit()
# Test the RLS estimates against OLS estimates
mod_ols = OLS.from_formula('cpi ~ m1', data=dta)
res_ols = mod_ols.fit()
assert_allclose(res.params, res_ols.params)
示例3: setup_class
# 需要导入模块: from statsmodels.regression.linear_model import OLS [as 别名]
# 或者: from statsmodels.regression.linear_model.OLS import from_formula [as 别名]
def setup_class(cls):
from .test_diagnostic import get_duncan_data
endog, exog, labels = get_duncan_data()
data = pd.DataFrame(np.column_stack((endog, exog)),
columns='y const var1 var2'.split(),
index=labels)
res0 = GLM.from_formula('y ~ const + var1 + var2 - 1', data).fit()
res1 = OLS.from_formula('y ~ const + var1 + var2 - 1', data).fit()
cls.infl1 = res1.get_influence()
cls.infl0 = res0.get_influence()
示例4: setup_class
# 需要导入模块: from statsmodels.regression.linear_model import OLS [as 别名]
# 或者: from statsmodels.regression.linear_model.OLS import from_formula [as 别名]
def setup_class(cls):
nobs = 30
np.random.seed(987128)
x = np.random.randn(nobs, 3)
y = x.sum(1) + np.random.randn(nobs)
index = ['obs%02d' % i for i in range(nobs)]
# add one extra column to check that it doesn't matter
cls.data = pd.DataFrame(np.round(np.column_stack((y, x)), 4),
columns='y var1 var2 var3'.split(),
index=index)
cls.res = OLS.from_formula('y ~ var1 + var2', data=cls.data).fit()
示例5: test_missing_formula_predict
# 需要导入模块: from statsmodels.regression.linear_model import OLS [as 别名]
# 或者: from statsmodels.regression.linear_model.OLS import from_formula [as 别名]
def test_missing_formula_predict():
# see 2171
nsample = 30
data = pandas.DataFrame({'x': np.linspace(0, 10, nsample)})
null = pandas.DataFrame({'x': np.array([np.nan])})
data = pandas.concat([data, null])
beta = np.array([1, 0.1])
e = np.random.normal(size=nsample+1)
data['y'] = beta[0] + beta[1] * data['x'] + e
model = OLS.from_formula('y ~ x', data=data)
fit = model.fit()
pred = fit.predict(exog=data[:-1])
示例6: env_corr
# 需要导入模块: from statsmodels.regression.linear_model import OLS [as 别名]
# 或者: from statsmodels.regression.linear_model.OLS import from_formula [as 别名]
def env_corr(self, env_vars, coeff_plot=False, qq_plot=False):
"""
Determine correlations with environmental/non-discretionary variables
using a logit regression. Tobit will be implemented when available
upstream in statsmodels.
Takes:
env_vars: A pandas dataframe of environmental variables
Returns:
corr_mod: the statsmodels' model instance containing the inputs
and results from the logit model.
Note that there can be no spaces in the variables' names.
"""
import matplotlib.pyplot as plt
from statsmodels.regression.linear_model import OLS
from statsmodels.graphics.gofplots import qqplot
from seaborn import coefplot
env_data = _to_dataframe(env_vars)
corr_data = env_data.join(self['Efficiency'])
corr_mod = OLS.from_formula(
"Efficiency ~ " + " + ".join(env_vars.columns), corr_data)
corr_res = corr_mod.fit()
#plot coeffs
if coeff_plot:
coefplot("Efficiency ~ " + " + ".join(env_vars.columns),
data=corr_data)
plt.xticks(rotation=45, ha='right')
plt.title('Regression coefficients and standard errors')
#plot qq of residuals
if qq_plot:
qqplot(corr_res.resid, line='s')
plt.title('Distribution of residuals')
print(corr_res.summary())
return corr_res
示例7: test_outlier_test
# 需要导入模块: from statsmodels.regression.linear_model import OLS [as 别名]
# 或者: from statsmodels.regression.linear_model.OLS import from_formula [as 别名]
def test_outlier_test():
endog, exog, labels = get_duncan_data()
ndarray_mod = OLS(endog, exog).fit()
rstudent = [3.1345185839, -2.3970223990, 2.0438046359, -1.9309187757,
1.8870465798, -1.7604905300, -1.7040324156, 1.6024285876,
-1.4332485037, -1.1044851583, 1.0688582315, 1.0185271840,
-0.9024219332, -0.9023876471, -0.8830953936, 0.8265782334,
0.8089220547, 0.7682770197, 0.7319491074, -0.6665962829,
0.5227352794, -0.5135016547, 0.5083881518, 0.4999224372,
-0.4980818221, -0.4759717075, -0.4293565820, -0.4114056499,
-0.3779540862, 0.3556874030, 0.3409200462, 0.3062248646,
0.3038999429, -0.3030815773, -0.1873387893, 0.1738050251,
0.1424246593, -0.1292266025, 0.1272066463, -0.0798902878,
0.0788467222, 0.0722556991, 0.0505098280, 0.0233215136,
0.0007112055]
unadj_p = [0.003177202, 0.021170298, 0.047432955, 0.060427645, 0.066248120,
0.085783008, 0.095943909, 0.116738318, 0.159368890, 0.275822623,
0.291386358, 0.314400295, 0.372104049, 0.372122040, 0.382333561,
0.413260793, 0.423229432, 0.446725370, 0.468363101, 0.508764039,
0.603971990, 0.610356737, 0.613905871, 0.619802317, 0.621087703,
0.636621083, 0.669911674, 0.682917818, 0.707414459, 0.723898263,
0.734904667, 0.760983108, 0.762741124, 0.763360242, 0.852319039,
0.862874018, 0.887442197, 0.897810225, 0.899398691, 0.936713197,
0.937538115, 0.942749758, 0.959961394, 0.981506948, 0.999435989]
bonf_p = [0.1429741, 0.9526634, 2.1344830, 2.7192440, 2.9811654, 3.8602354,
4.3174759, 5.2532243, 7.1716001, 12.4120180, 13.1123861, 14.1480133,
16.7446822, 16.7454918, 17.2050103, 18.5967357, 19.0453245,
20.1026416, 21.0763395, 22.8943818, 27.1787396, 27.4660532,
27.6257642, 27.8911043, 27.9489466, 28.6479487, 30.1460253,
30.7313018, 31.8336506, 32.5754218, 33.0707100, 34.2442399,
34.3233506, 34.3512109, 38.3543568, 38.8293308, 39.9348989,
40.4014601, 40.4729411, 42.1520939, 42.1892152, 42.4237391,
43.1982627, 44.1678127, 44.9746195]
bonf_p = np.array(bonf_p)
bonf_p[bonf_p > 1] = 1
sorted_labels = ["minister", "reporter", "contractor", "insurance.agent",
"machinist", "store.clerk", "conductor", "factory.owner",
"mail.carrier", "streetcar.motorman", "carpenter", "coal.miner",
"bartender", "bookkeeper", "soda.clerk", "chemist", "RR.engineer",
"professor", "electrician", "gas.stn.attendant", "auto.repairman",
"watchman", "banker", "machine.operator", "dentist", "waiter",
"shoe.shiner", "welfare.worker", "plumber", "physician", "pilot",
"engineer", "accountant", "lawyer", "undertaker", "barber",
"store.manager", "truck.driver", "cook", "janitor", "policeman",
"architect", "teacher", "taxi.driver", "author"]
res2 = np.c_[rstudent, unadj_p, bonf_p]
res = oi.outlier_test(ndarray_mod, method='b', labels=labels, order=True)
np.testing.assert_almost_equal(res.values, res2, 7)
np.testing.assert_equal(res.index.tolist(), sorted_labels) # pylint: disable-msg=E1103
data = pd.DataFrame(np.column_stack((endog, exog)),
columns='y const var1 var2'.split(),
index=labels)
# check `order` with pandas bug in #3971
res_pd = OLS.from_formula('y ~ const + var1 + var2 - 0', data).fit()
res_outl2 = oi.outlier_test(res_pd, method='b', order=True)
assert_almost_equal(res_outl2.values, res2, 7)
assert_equal(res_outl2.index.tolist(), sorted_labels)
res_outl1 = res_pd.outlier_test(method='b')
res_outl1 = res_outl1.sort_values(['unadj_p'], ascending=True)
assert_almost_equal(res_outl1.values, res2, 7)
assert_equal(res_outl1.index.tolist(), sorted_labels)
assert_array_equal(res_outl2.index, res_outl1.index)
# additional keywords in method
res_outl3 = res_pd.outlier_test(method='b', order=True)
assert_equal(res_outl3.index.tolist(), sorted_labels)
res_outl4 = res_pd.outlier_test(method='b', order=True, cutoff=0.15)
assert_equal(res_outl4.index.tolist(), sorted_labels[:1])