本文整理汇总了Python中patsy.dmatrix方法的典型用法代码示例。如果您正苦于以下问题:Python patsy.dmatrix方法的具体用法?Python patsy.dmatrix怎么用?Python patsy.dmatrix使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类patsy
的用法示例。
在下文中一共展示了patsy.dmatrix方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _transform
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrix [as 别名]
def _transform(self, var, by):
if not isinstance(var, SimpleVariable):
self._densify_variables()
# Set up all the splitting variables as a DF. Note that variables in
# 'by' can be either regular variables, or entities in the index--so
# we need to check both places.
all_variables = self._variables
by_variables = [all_variables[v].values if v in all_variables
else var.index[v].reset_index(drop=True)
for v in listify(by)]
group_data = pd.concat(by_variables, axis=1, sort=True)
group_data.columns = listify(by)
# Use patsy to create splitting design matrix
group_data = group_data.astype(str)
formula = '0+' + ':'.join(listify(by))
dm = dmatrix(formula, data=group_data, return_type='dataframe')
dm.columns = [col.replace(':', '.') for col in dm.columns]
return var.split(dm)
示例2: __predict__
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrix [as 别名]
def __predict__(df, model, variable, formula):
"""UNUSED FUNCTION
New hidden predict method to shorten Monte Carlo estimation code. This is slower than statsmodels predict,
but likely can be optimized. I would need to find a faster alternative to np.dot()...
For this to work, I need to do the following:
-need to have each ..._model() store a self.formula
-switch all functions to sm.GLM or sm.GEE
"""
import patsy
from zepid.calc import odds_to_probability
xdata = patsy.dmatrix(formula, df) # , return_type='dataframe')
# pred = xdata.mul(np.array(model.params), axis='columns').sum(axis=1)
pred = xdata.dot(model.params) # TODO optimize this...
if variable == 'binary':
pred = np.random.binomial(1, odds_to_probability(np.exp(pred)), size=xdata.shape[0])
elif variable == 'continuous':
pred = np.random.normal(loc=pred, scale=np.std(model.resid), size=len(pp))
# TODO add optimization for multinomial (if applicable)
else:
raise ValueError('That option is not supported')
return pred
示例3: design_mat
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrix [as 别名]
def design_mat(mod, numerical_covariates, batch_levels):
# require levels to make sure they are in the same order as we use in the
# rest of the script.
design = patsy.dmatrix("~ 0 + C(batch, levels=%s)" % str(batch_levels),
mod, return_type="dataframe")
mod = mod.drop(["batch"], axis=1)
numerical_covariates = list(numerical_covariates)
sys.stderr.write("found %i batches\n" % design.shape[1])
other_cols = [c for i, c in enumerate(mod.columns)
if not i in numerical_covariates]
factor_matrix = mod[other_cols]
design = pd.concat((design, factor_matrix), axis=1)
if numerical_covariates is not None:
sys.stderr.write("found %i numerical covariates...\n"
% len(numerical_covariates))
for i, nC in enumerate(numerical_covariates):
cname = mod.columns[nC]
sys.stderr.write("\t{0}\n".format(cname))
design[cname] = mod[mod.columns[nC]]
sys.stderr.write("found %i categorical variables:" % len(other_cols))
sys.stderr.write("\t" + ", ".join(other_cols) + '\n')
return design
示例4: transform_with_patsy
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrix [as 别名]
def transform_with_patsy(formula, data, *args, **kwargs):
try:
# needs patsy v0.5.1 to support formula in Python 3.7
# https://github.com/pydata/patsy/pull/131
import patsy
except ImportError:
raise ImportError("'patsy' is required to transform with string formula")
if '~' in formula:
y, X = patsy.dmatrices(formula, data=data, return_type='dataframe',
*args, **kwargs)
if len(y.shape) > 1 and y.shape[1] != 1:
raise ValueError('target must be 1 dimensional')
y = y.iloc[:, 0]
return data._constructor(X, target=y)
else:
X = patsy.dmatrix(formula, data=data, return_type='dataframe',
*args, **kwargs)
return data._constructor(X)
示例5: transform
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrix [as 别名]
def transform(self, data):
"""Transform with estimator using formula.
Transform the data using formula, then transform it
using the estimator.
Parameters
----------
data : dict-like (pandas dataframe)
Input data. Column names need to match variables in formula.
"""
if self.return_type == 'dataframe':
X = dmatrix(self.design_X_, data, return_type='dataframe')
else:
X = np.array(dmatrix(self.design_X_, data))
return self.estimator_.transform(X)
示例6: test_harmonic_transform
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrix [as 别名]
def test_harmonic_transform():
x = np.arange(735688, 735688 + 100, 1)
design = patsy.dmatrix('0 + harm(x, 1)')
truth = np.vstack((np.cos(2 * np.pi / 365.25 * x),
np.sin(2 * np.pi / 365.25 * x))).T
np.testing.assert_equal(np.asarray(design), truth)
示例7: setup
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrix [as 别名]
def setup(self, df, **config):
""" Setup model for input dataset and (optionally) return design matrix
Args:
df (pandas.DataFrame): Pandas dataframe containing dataset
attributes (e.g., dates, image ID, path/row, metadata, etc.)
config (dict): YATSM configuration dictionary from user, including
'dataset' and 'YATSM' sub-configurations
Returns:
numpy.ndarray or None: return design matrix if used by algorithm
"""
X = patsy.dmatrix(config['YATSM']['design_matrix'], data=df)
return X
示例8: test_predict_formula
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrix [as 别名]
def test_predict_formula(self):
n = 100
np.random.seed(34234)
time = 50 * np.random.uniform(size=n)
status = np.random.randint(0, 2, n).astype(np.float64)
exog = np.random.uniform(1, 2, size=(n, 2))
df = pd.DataFrame({"time": time, "status": status,
"exog1": exog[:, 0], "exog2": exog[:, 1]})
# Works with "0 +" on RHS but issues warning
fml = "time ~ exog1 + np.log(exog2) + exog1*exog2"
model1 = PHReg.from_formula(fml, df, status=status)
result1 = model1.fit()
from patsy import dmatrix
dfp = dmatrix(model1.data.design_info.builder, df)
pr1 = result1.predict()
pr2 = result1.predict(exog=df)
pr3 = model1.predict(result1.params, exog=dfp) # No standard errors
pr4 = model1.predict(result1.params, cov_params=result1.cov_params(), exog=dfp)
prl = (pr1, pr2, pr3, pr4)
for i in range(4):
for j in range(i):
assert_allclose(prl[i].predicted_values, prl[j].predicted_values)
prl = (pr1, pr2, pr4)
for i in range(3):
for j in range(i):
assert_allclose(prl[i].standard_errors, prl[j].standard_errors)
示例9: test_framing_example
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrix [as 别名]
def test_framing_example():
cur_dir = os.path.dirname(os.path.abspath(__file__))
data = pd.read_csv(os.path.join(cur_dir, 'results', "framing.csv"))
outcome = np.asarray(data["cong_mesg"])
outcome_exog = patsy.dmatrix("emo + treat + age + educ + gender + income", data,
return_type='dataframe')
probit = sm.families.links.probit
outcome_model = sm.GLM(outcome, outcome_exog, family=sm.families.Binomial(link=probit()))
mediator = np.asarray(data["emo"])
mediator_exog = patsy.dmatrix("treat + age + educ + gender + income", data,
return_type='dataframe')
mediator_model = sm.OLS(mediator, mediator_exog)
tx_pos = [outcome_exog.columns.tolist().index("treat"),
mediator_exog.columns.tolist().index("treat")]
med_pos = outcome_exog.columns.tolist().index("emo")
med = Mediation(outcome_model, mediator_model, tx_pos, med_pos,
outcome_fit_kwargs={'atol':1e-11})
np.random.seed(4231)
para_rslt = med.fit(method='parametric', n_rep=100)
diff = np.asarray(para_rslt.summary() - framing_para_4231)
assert_allclose(diff, 0, atol=1e-6)
np.random.seed(4231)
boot_rslt = med.fit(method='boot', n_rep=100)
diff = np.asarray(boot_rslt.summary() - framing_boot_4231)
assert_allclose(diff, 0, atol=1e-6)
示例10: test_framing_example_moderator
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrix [as 别名]
def test_framing_example_moderator():
# moderation without formulas, generally not useful but test anyway
cur_dir = os.path.dirname(os.path.abspath(__file__))
data = pd.read_csv(os.path.join(cur_dir, 'results', "framing.csv"))
outcome = np.asarray(data["cong_mesg"])
outcome_exog = patsy.dmatrix("emo + treat + age + educ + gender + income", data,
return_type='dataframe')
probit = sm.families.links.probit
outcome_model = sm.GLM(outcome, outcome_exog, family=sm.families.Binomial(link=probit()))
mediator = np.asarray(data["emo"])
mediator_exog = patsy.dmatrix("treat + age + educ + gender + income", data,
return_type='dataframe')
mediator_model = sm.OLS(mediator, mediator_exog)
tx_pos = [outcome_exog.columns.tolist().index("treat"),
mediator_exog.columns.tolist().index("treat")]
med_pos = outcome_exog.columns.tolist().index("emo")
ix = (outcome_exog.columns.tolist().index("age"),
mediator_exog.columns.tolist().index("age"))
moderators = {ix : 20}
med = Mediation(outcome_model, mediator_model, tx_pos, med_pos,
moderators=moderators)
# Just a smoke test
np.random.seed(4231)
med_rslt = med.fit(method='parametric', n_rep=100)
示例11: test_patsy_577
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrix [as 别名]
def test_patsy_577():
X = np.random.random((10, 2))
df = pandas.DataFrame(X, columns=["var1", "var2"])
from patsy import dmatrix
endog = dmatrix("var1 - 1", df)
np.testing.assert_(data._is_using_patsy(endog, None))
exog = dmatrix("var2 - 1", df)
np.testing.assert_(data._is_using_patsy(endog, exog))
示例12: _process_kwds
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrix [as 别名]
def _process_kwds(self, kwds, ix):
kwds = kwds.copy()
for k in kwds:
v = kwds[k]
if isinstance(v, PatsyFormula):
mat = patsy.dmatrix(v.formula, self.data,
return_type="dataframe")
mat = np.asarray(mat)[ix, :]
if mat.shape[1] == 1:
mat = mat[:, 0]
kwds[k] = mat
return kwds
示例13: fit
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrix [as 别名]
def fit(self, X, y=None):
"""Fits the estimator"""
X_ = dmatrix(self.formula, X, NA_action="raise", return_type=self.return_type)
# check the number of observations hasn't changed. This ought not to
# be necessary given NA_action='raise' above but just to be safe
assert np.array(X_).shape[0] == np.array(X).shape[0]
self.design_info_ = X_.design_info
return self
示例14: diff_test_helper
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrix [as 别名]
def diff_test_helper(data,
fullModelFormulaStr="~cr(time, df=3)",
reducedModelFormulaStr="~1",
):
# Dividing data into train and validation datasets
transformed_x = dmatrix(fullModelFormulaStr, data, return_type='dataframe')
transformed_x_null = dmatrix(reducedModelFormulaStr, data, return_type='dataframe')
expression = data['expression']
poisson_training_results = sm.GLM(expression, transformed_x, family=sm.families.Poisson()).fit()
poisson_df = pd.DataFrame({'mu': poisson_training_results.mu, 'expression': expression})
poisson_df['AUX_OLS_DEP'] = poisson_df.apply(lambda x: ((x['expression'] - x['mu']) ** 2
- x['expression']) / x['mu'], axis=1)
ols_expr = """AUX_OLS_DEP ~ mu - 1"""
aux_olsr_results = smf.ols(ols_expr, poisson_df).fit()
nb2_family = sm.families.NegativeBinomial(alpha=aux_olsr_results.params[0])
try:
nb2_full = sm.GLM(expression, transformed_x, family=nb2_family).fit()
nb2_null = sm.GLM(expression, transformed_x_null, family=nb2_family).fit()
except:
return ('fail', 'NB2', 1)
pval = lrt(nb2_full, nb2_null)
return ('ok', 'NB2', pval)
示例15: _make_rhs_matrix
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrix [as 别名]
def _make_rhs_matrix(self, X):
formula_parts = self.formula.split('~')
if len(formula_parts) == 2:
rhs_formula = formula_parts[1].strip()
elif len(formula_parts) == 1:
rhs_formula = formula_parts.strip()
else:
raise ValueError(
f"Cannot parse model formula {self.formula} to determine right hand side!")
X = pt.dmatrix(rhs_formula, X)
return X