本文整理汇总了Python中statsmodels.duration.hazard_regression.PHReg类的典型用法代码示例。如果您正苦于以下问题:Python PHReg类的具体用法?Python PHReg怎么用?Python PHReg使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了PHReg类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_fit_regularized
def test_fit_regularized(self):
# Data set sizes
for n,p in (50,2),(100,5):
# Penalty weights
for js,s in enumerate([0,0.1]):
coef_name = "coef_%d_%d_%d" % (n, p, js)
coef = getattr(survival_enet_r_results, coef_name)
fname = "survival_data_%d_%d.csv" % (n, p)
time, status, entry, exog = self.load_file(fname)
exog -= exog.mean(0)
exog /= exog.std(0, ddof=1)
mod = PHReg(time, exog, status=status, ties='breslow')
rslt = mod.fit_regularized(alpha=s)
# The agreement isn't very high, the issue may be on
# their side. They seem to use some approximations
# that we are not using.
assert_allclose(rslt.params, coef, rtol=0.3)
# Smoke test for summary
smry = rslt.summary()
示例2: test_formula
def test_formula(self):
np.random.seed(34234)
time = 50 * np.random.uniform(size=200)
status = np.random.randint(0, 2, 200).astype(np.float64)
exog = np.random.normal(size=(200,4))
entry = np.zeros_like(time)
entry[0:10] = time[0:10] / 2
df = pd.DataFrame({"time": time, "status": status,
"exog1": exog[:, 0], "exog2": exog[:, 1],
"exog3": exog[:, 2], "exog4": exog[:, 3],
"entry": entry})
mod1 = PHReg(time, exog, status, entry=entry)
rslt1 = mod1.fit()
fml = "time ~ 0 + exog1 + exog2 + exog3 + exog4"
mod2 = PHReg.from_formula(fml, df, status=status,
entry=entry)
rslt2 = mod2.fit()
mod3 = PHReg.from_formula(fml, df, status="status",
entry="entry")
rslt3 = mod3.fit()
assert_allclose(rslt1.params, rslt2.params)
assert_allclose(rslt1.params, rslt3.params)
assert_allclose(rslt1.bse, rslt2.bse)
assert_allclose(rslt1.bse, rslt3.bse)
示例3: test_fit_regularized
def test_fit_regularized(self):
# Data set sizes
for n,p in (50,2),(100,5):
# Penalty weights
for js,s in enumerate([0,0.1]):
coef_name = "coef_%d_%d_%d" % (n, p, js)
params = getattr(survival_enet_r_results, coef_name)
fname = "survival_data_%d_%d.csv" % (n, p)
time, status, entry, exog = self.load_file(fname)
exog -= exog.mean(0)
exog /= exog.std(0, ddof=1)
model = PHReg(time, exog, status=status, ties='breslow')
sm_result = model.fit_regularized(alpha=s)
# The agreement isn't very high, the issue may be on
# the R side. See below for further checks.
assert_allclose(sm_result.params, params, rtol=0.3)
# The penalized log-likelihood that we are maximizing.
def plf(params):
llf = model.loglike(params) / len(time)
L1_wt = 1
llf = llf - s * ((1 - L1_wt)*np.sum(params**2) / 2 + L1_wt*np.sum(np.abs(params)))
return llf
# Confirm that we are doing better than glmnet.
llf_r = plf(params)
llf_sm = plf(sm_result.params)
assert_equal(np.sign(llf_sm - llf_r), 1)
示例4: test_summary
def test_summary(self):
# smoke test
np.random.seed(34234)
time = 50 * np.random.uniform(size=200)
status = np.random.randint(0, 2, 200).astype(np.float64)
exog = np.random.normal(size=(200,4))
mod = PHReg(time, exog, status)
rslt = mod.fit()
rslt.summary()
示例5: test_summary
def test_summary(self):
# smoke test
np.random.seed(34234)
time = 50 * np.random.uniform(size=200)
status = np.random.randint(0, 2, 200).astype(np.float64)
exog = np.random.normal(size=(200,4))
mod = PHReg(time, exog, status)
rslt = mod.fit()
smry = rslt.summary()
strata = np.kron(np.arange(50), np.ones(4))
mod = PHReg(time, exog, status, strata=strata)
rslt = mod.fit()
smry = rslt.summary()
msg = "3 strata dropped for having no events"
assert_(msg in str(smry))
groups = np.kron(np.arange(25), np.ones(8))
mod = PHReg(time, exog, status)
rslt = mod.fit(groups=groups)
smry = rslt.summary()
entry = np.random.uniform(0.1, 0.8, 200) * time
mod = PHReg(time, exog, status, entry=entry)
rslt = mod.fit()
smry = rslt.summary()
msg = "200 observations have positive entry times"
assert_(msg in str(smry))
示例6: test_offset
def test_offset(self):
np.random.seed(34234)
time = 50 * np.random.uniform(size=200)
status = np.random.randint(0, 2, 200).astype(np.float64)
exog = np.random.normal(size=(200,4))
mod1 = PHReg(time, exog, status)
rslt1 = mod1.fit()
offset = exog[:,0] * rslt1.params[0]
exog = exog[:, 1:]
mod2 = PHReg(time, exog, status, offset=offset)
rslt2 = mod2.fit()
assert_allclose(rslt2.params, rslt1.params[1:])
示例7: test_predict_formula
def test_predict_formula(self):
n = 100
np.random.seed(34234)
time = 50 * np.random.uniform(size=n)
status = np.random.randint(0, 2, n).astype(np.float64)
exog = np.random.uniform(1, 2, size=(n, 2))
df = pd.DataFrame({"time": time, "status": status,
"exog1": exog[:, 0], "exog2": exog[:, 1]})
fml = "time ~ 0 + exog1 + np.log(exog2) + exog1*exog2"
model1 = PHReg.from_formula(fml, df, status=status)
result1 = model1.fit()
from patsy import dmatrix
dfp = dmatrix(model1.data.design_info.builder, df)
pr1 = result1.predict()
pr2 = result1.predict(exog=df)
pr3 = model1.predict(result1.params, exog=dfp) # No standard errors
pr4 = model1.predict(result1.params, cov_params=result1.cov_params(), exog=dfp)
prl = (pr1, pr2, pr3, pr4)
for i in range(4):
for j in range(i):
assert_allclose(prl[i].predicted_values, prl[j].predicted_values)
prl = (pr1, pr2, pr4)
for i in range(3):
for j in range(i):
assert_allclose(prl[i].standard_errors, prl[j].standard_errors)
示例8: test_predict
def test_predict(self):
# All smoke tests. We should be able to convert the lhr and hr
# tests into real tests against R. There are many options to
# this function that may interact in complicated ways. Only a
# few key combinations are tested here.
np.random.seed(34234)
endog = 50 * np.random.uniform(size=200)
status = np.random.randint(0, 2, 200).astype(np.float64)
exog = np.random.normal(size=(200, 4))
mod = PHReg(endog, exog, status)
rslt = mod.fit()
rslt.predict()
for pred_type in "lhr", "hr", "cumhaz", "surv":
rslt.predict(pred_type=pred_type)
rslt.predict(endog=endog[0:10], pred_type=pred_type)
rslt.predict(endog=endog[0:10], exog=exog[0:10, :], pred_type=pred_type)
示例9: do1
def do1(self, fname, ties, entry_f, strata_f):
# Read the test data.
time, status, entry, exog = self.load_file(fname)
n = len(time)
vs = fname.split("_")
n = int(vs[2])
p = int(vs[3].split(".")[0])
ties1 = ties[0:3]
# Needs to match the kronecker statement in survival.R
strata = np.kron(range(5), np.ones(n // 5))
# No stratification or entry times
mod = PHReg(time, exog, status, ties=ties)
phrb = mod.fit(**args)
coef_r, se_r, time_r, hazard_r = get_results(n, p, None, ties1)
assert_allclose(phrb.params, coef_r, rtol=1e-3)
assert_allclose(phrb.bse, se_r, rtol=1e-4)
time_h, cumhaz, surv = phrb.baseline_cumulative_hazard[0]
# Entry times but no stratification
phrb = PHReg(time, exog, status, entry=entry,
ties=ties).fit(**args)
coef, se, time_r, hazard_r = get_results(n, p, "et", ties1)
assert_allclose(phrb.params, coef, rtol=1e-3)
assert_allclose(phrb.bse, se, rtol=1e-3)
# Stratification but no entry times
phrb = PHReg(time, exog, status, strata=strata,
ties=ties).fit(**args)
coef, se, time_r, hazard_r = get_results(n, p, "st", ties1)
assert_allclose(phrb.params, coef, rtol=1e-4)
assert_allclose(phrb.bse, se, rtol=1e-4)
# Stratification and entry times
phrb = PHReg(time, exog, status, entry=entry,
strata=strata, ties=ties).fit(**args)
coef, se, time_r, hazard_r = get_results(n, p, "et_st", ties1)
assert_allclose(phrb.params, coef, rtol=1e-3)
assert_allclose(phrb.bse, se, rtol=1e-4)
#smoke test
time_h, cumhaz, surv = phrb.baseline_cumulative_hazard[0]
示例10: test_get_distribution
def test_get_distribution(self):
# Smoke test
np.random.seed(34234)
exog = np.random.normal(size=(200, 2))
lin_pred = exog.sum(1)
elin_pred = np.exp(-lin_pred)
time = -elin_pred * np.log(np.random.uniform(size=200))
mod = PHReg(time, exog)
rslt = mod.fit()
dist = rslt.get_distribution()
fitted_means = dist.mean()
true_means = elin_pred
fitted_var = dist.var()
fitted_sd = dist.std()
sample = dist.rvs()
示例11: test_formula_args
def test_formula_args(self):
np.random.seed(34234)
n = 200
time = 50 * np.random.uniform(size=n)
status = np.random.randint(0, 2, size=n).astype(np.float64)
exog = np.random.normal(size=(200, 2))
offset = np.random.uniform(size=n)
entry = np.random.uniform(0, 1, size=n) * time
df = pd.DataFrame({"time": time, "status": status, "x1": exog[:, 0],
"x2": exog[:, 1], "offset": offset, "entry": entry})
model1 = PHReg.from_formula("time ~ x1 + x2", status="status", offset="offset",
entry="entry", data=df)
result1 = model1.fit()
model2 = PHReg.from_formula("time ~ x1 + x2", status=df.status, offset=df.offset,
entry=df.entry, data=df)
result2 = model2.fit()
assert_allclose(result1.params, result2.params)
assert_allclose(result1.bse, result2.bse)
示例12: test_get_distribution
def test_get_distribution(self):
np.random.seed(34234)
n = 200
exog = np.random.normal(size=(n, 2))
lin_pred = exog.sum(1)
elin_pred = np.exp(-lin_pred)
time = -elin_pred * np.log(np.random.uniform(size=n))
status = np.ones(n)
status[0:20] = 0
strata = np.kron(range(5), np.ones(n // 5))
mod = PHReg(time, exog, status=status, strata=strata)
rslt = mod.fit()
dist = rslt.get_distribution()
fitted_means = dist.mean()
true_means = elin_pred
fitted_var = dist.var()
fitted_sd = dist.std()
sample = dist.rvs()
示例13: test_formula_cat_interactions
def test_formula_cat_interactions(self):
time = np.r_[1, 2, 3, 4, 5, 6, 7, 8, 9]
status = np.r_[1, 1, 0, 0, 1, 0, 1, 1, 1]
x1 = np.r_[1, 1, 1, 2, 2, 2, 3, 3, 3]
x2 = np.r_[1, 2, 3, 1, 2, 3, 1, 2, 3]
df = pd.DataFrame({"time": time, "status": status,
"x1": x1, "x2": x2})
model1 = PHReg.from_formula("time ~ C(x1) + C(x2) + C(x1)*C(x2)", status="status",
data=df)
assert_equal(model1.exog.shape, [9, 8])
示例14: test_post_estimation
def test_post_estimation(self):
# All regression tests
np.random.seed(34234)
time = 50 * np.random.uniform(size=200)
status = np.random.randint(0, 2, 200).astype(np.float64)
exog = np.random.normal(size=(200,4))
mod = PHReg(time, exog, status)
rslt = mod.fit()
mart_resid = rslt.martingale_residuals
assert_allclose(np.abs(mart_resid).sum(), 120.72475743348433)
w_avg = rslt.weighted_covariate_averages
assert_allclose(np.abs(w_avg[0]).sum(0),
np.r_[7.31008415, 9.77608674,10.89515885, 13.1106801])
bc_haz = rslt.baseline_cumulative_hazard
v = [np.mean(np.abs(x)) for x in bc_haz[0]]
w = np.r_[23.482841556421608, 0.44149255358417017,
0.68660114081275281]
assert_allclose(v, w)
score_resid = rslt.score_residuals
v = np.r_[ 0.50924792, 0.4533952, 0.4876718, 0.5441128]
w = np.abs(score_resid).mean(0)
assert_allclose(v, w)
groups = np.random.randint(0, 3, 200)
mod = PHReg(time, exog, status)
rslt = mod.fit(groups=groups)
robust_cov = rslt.cov_params()
v = [0.00513432, 0.01278423, 0.00810427, 0.00293147]
w = np.abs(robust_cov).mean(0)
assert_allclose(v, w, rtol=1e-6)
s_resid = rslt.schoenfeld_residuals
ii = np.flatnonzero(np.isfinite(s_resid).all(1))
s_resid = s_resid[ii, :]
v = np.r_[0.85154336, 0.72993748, 0.73758071, 0.78599333]
assert_allclose(np.abs(s_resid).mean(0), v)
示例15: _kernel_cumincidence
def _kernel_cumincidence(time, status, exog, kfunc, freq_weights,
dimred=True):
"""
Calculates cumulative incidence functions using kernels.
Parameters
----------
time : array-like
The observed time values
status : array-like
The status values. status == 0 indicates censoring,
status == 1, 2, ... are the events.
exog : array-like
Covariates such that censoring becomes independent of
outcome times conditioned on the covariate values.
kfunc : function
A kernel function
freq_weights : array-like
Optional frequency weights
dimred : boolean
If True, proportional hazards regression models are used to
reduce exog to two columns by predicting overall events and
censoring in two separate models. If False, exog is used
directly for calculating kernel weights without dimension
reduction.
"""
# Reorder so time is ascending
ii = np.argsort(time)
time = time[ii]
status = status[ii]
exog = exog[ii, :]
nobs = len(time)
# Convert the unique times to ranks (0, 1, 2, ...)
utime, rtime = np.unique(time, return_inverse=True)
# Last index where each unique time occurs.
ie = np.searchsorted(time, utime, side='right') - 1
ngrp = int(status.max())
# All-cause status
statusa = (status >= 1).astype(np.float64)
if freq_weights is not None:
freq_weights = freq_weights / freq_weights.sum()
ip = []
sp = [None] * nobs
n_risk = [None] * nobs
kd = [None] * nobs
for k in range(ngrp):
status0 = (status == k + 1).astype(np.float64)
# Dimension reduction step
if dimred:
sfe = PHReg(time, exog, status0).fit()
fitval_e = sfe.predict().predicted_values
sfc = PHReg(time, exog, 1 - status0).fit()
fitval_c = sfc.predict().predicted_values
exog2d = np.hstack((fitval_e[:, None], fitval_c[:, None]))
exog2d -= exog2d.mean(0)
exog2d /= exog2d.std(0)
else:
exog2d = exog
ip0 = 0
for i in range(nobs):
if k == 0:
kd1 = exog2d - exog2d[i, :]
kd1 = kfunc(kd1)
kd[i] = kd1
# Get the local all-causes survival function
if k == 0:
denom = np.cumsum(kd[i][::-1])[::-1]
num = kd[i] * statusa
rat = num / denom
tr = 1e-15
ii = np.flatnonzero((denom < tr) & (num < tr))
rat[ii] = 0
ratc = 1 - rat
ratc = np.clip(ratc, 1e-10, np.inf)
lrat = np.log(ratc)
prat = np.cumsum(lrat)[ie]
sf = np.exp(prat)
sp[i] = np.r_[1, sf[:-1]]
n_risk[i] = denom[ie]
# Number of cause-specific deaths at each unique time.
d0 = np.bincount(rtime, weights=status0*kd[i],
minlength=len(utime))
# The cumulative incidence function probabilities. Carry
# forward once the effective sample size drops below 1.
ip1 = np.cumsum(sp[i] * d0 / n_risk[i])
jj = len(ip1) - np.searchsorted(n_risk[i][::-1], 1)
if jj < len(ip1):
#.........这里部分代码省略.........