本文整理汇总了Python中patsy.dmatrices方法的典型用法代码示例。如果您正苦于以下问题:Python patsy.dmatrices方法的具体用法?Python patsy.dmatrices怎么用?Python patsy.dmatrices使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类patsy
的用法示例。
在下文中一共展示了patsy.dmatrices方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __setstate__
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def __setstate__(self, d):
if "restore_design_info" in d:
# NOTE: there may be a more performant way to do this
from patsy import dmatrices, PatsyError
exc = []
try:
data = d['frame']
except KeyError:
data = d['orig_endog'].join(d['orig_exog'])
for depth in [2, 3, 1, 0, 4]: # sequence is a guess where to likely find it
try:
_, design = dmatrices(d['formula'], data, eval_env=depth,
return_type='dataframe')
break
except (NameError, PatsyError) as e:
print('not in depth %d' % depth)
exc.append(e) # why do I need a reference from outside except block
pass
else:
raise exc[-1]
self.design_info = design.design_info
del d["restore_design_info"]
self.__dict__.update(d)
示例2: transform_with_patsy
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def transform_with_patsy(formula, data, *args, **kwargs):
try:
# needs patsy v0.5.1 to support formula in Python 3.7
# https://github.com/pydata/patsy/pull/131
import patsy
except ImportError:
raise ImportError("'patsy' is required to transform with string formula")
if '~' in formula:
y, X = patsy.dmatrices(formula, data=data, return_type='dataframe',
*args, **kwargs)
if len(y.shape) > 1 and y.shape[1] != 1:
raise ValueError('target must be 1 dimensional')
y = y.iloc[:, 0]
return data._constructor(X, target=y)
else:
X = patsy.dmatrix(formula, data=data, return_type='dataframe',
*args, **kwargs)
return data._constructor(X)
示例3: fit
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def fit(self, data, y=None):
"""Fit the scikit-learn model using the formula.
Parameters
----------
data : dict-like (pandas dataframe)
Input data. Contains features and possible labels.
Column names need to match variables in formula.
"""
eval_env = EvalEnvironment.capture(self.eval_env, reference=1)
formula = _drop_intercept(self.formula, self.add_intercept)
design_y, design_X = dmatrices(formula, data, eval_env=eval_env,
NA_action=self.NA_action)
self.design_y_ = design_y.design_info
self.design_X_ = design_X.design_info
self.feature_names_ = design_X.design_info.column_names
# convert to 1d vector so we don't get a warning
# from sklearn.
design_y = column_or_1d(design_y)
est = clone(self.estimator)
self.estimator_ = est.fit(design_X, design_y)
return self
示例4: test_from_formula_vs_no_formula
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def test_from_formula_vs_no_formula():
mod = _MultivariateOLS.from_formula(
'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted',
data)
r = mod.fit(method='svd')
r0 = r.mv_test()
endog, exog = patsy.dmatrices(
'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted',
data, return_type="dataframe")
L = np.array([[1, 0, 0, 0, 0, 0]])
# DataFrame input
r = _MultivariateOLS(endog, exog).fit(method='svd')
r1 = r.mv_test(hypotheses=[['Intercept', L, None]])
assert_array_almost_equal(r1['Intercept']['stat'].values,
r0['Intercept']['stat'].values, decimal=6)
# Numpy array input
r = _MultivariateOLS(endog.values, exog.values).fit(method='svd')
r1 = r.mv_test(hypotheses=[['Intercept', L, None]])
assert_array_almost_equal(r1['Intercept']['stat'].values,
r0['Intercept']['stat'].values, decimal=6)
L = np.array([[0, 1, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0],
])
r1 = r.mv_test(hypotheses=[['Drug', L, None]])
# DataFrame input
r = _MultivariateOLS(endog, exog).fit(method='svd')
r1 = r.mv_test(hypotheses=[['Drug', L, None]])
assert_array_almost_equal(r1['Drug']['stat'].values,
r0['Drug']['stat'].values, decimal=6)
# Numpy array input
r = _MultivariateOLS(endog.values, exog.values).fit(method='svd')
r1 = r.mv_test(hypotheses=[['Drug', L, None]])
assert_array_almost_equal(r1['Drug']['stat'].values,
r0['Drug']['stat'].values, decimal=6)
示例5: get_design_matrices
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def get_design_matrices(df, dependent_variable, independent_variables, interactions=[]):
patsy_model = create_patsy_model(dependent_variable, independent_variables, interactions=interactions)
y, X = dmatrices(patsy_model, df, return_type='dataframe')
return (y, X)
示例6: run_linear_regression
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def run_linear_regression(df, patsy_model, dependent_variable, estimator, weights):
y, X = dmatrices(patsy_model, df, return_type='dataframe')
model_result = sm.OLS(y, X).fit()
p_values = model_result.pvalues.to_dict()
t_values = model_result.tvalues.to_dict()
params = model_result.params.to_dict()
ste = model_result.bse.to_dict()
conf_ints = parse_confidence_intervals(model_result)
constants = {
'p_value': p_values.get('Intercept'),
't_value': t_values.get('Intercept'),
'coefficient': params.get('Intercept'),
'standard_error': ste.get('Intercept'),
'conf_int': conf_ints.get('Intercept')
}
regression_field_properties = {
'p_value': p_values,
't_value': t_values,
'coefficient': params,
'standard_error': ste,
'conf_int': conf_ints
}
total_regression_properties = {
'aic': model_result.aic,
'bic': model_result.bic,
'dof': model_result.nobs,
'r_squared': model_result.rsquared,
'r_squared_adj': model_result.rsquared_adj,
'f_test': model_result.fvalue,
# 'resid': model_result.resid.tolist()
}
regression_results = restructure_field_properties_dict(constants, regression_field_properties, total_regression_properties)
return regression_results
示例7: run_logistic_regression
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def run_logistic_regression(df, patsy_model, dependent_variable, estimator, weights):
y, X = dmatrices(patsy_model, df, return_type='dataframe')
model_result = discrete_model.MNLogit(y, X).fit(maxiter=100, disp=False, method="nm")
p_values = model_result.pvalues[0].to_dict()
t_values = model_result.tvalues[0].to_dict()
params = model_result.params[0].to_dict()
ste = model_result.bse[0].to_dict()
constants = {
'p_value': p_values.get('Intercept'),
't_value': t_values.get('Intercept'),
'coefficient': params.get('Intercept'),
'standard_error': ste.get('Intercept')
}
regression_field_properties = {
'p_value': p_values,
't_value': t_values,
'coefficient': params,
'standard_error': ste
}
total_regression_properties = {
'aic': model_result.aic,
'bic': model_result.bic,
'r_squared': model_result.prsquared,
'r_squared_adj': model_result.prsquared,
'llf': model_result.llf,
'llnull': model_result.llnull,
'llr_pvalue': model_result.llr_pvalue
# 'f_test': model_result.f_test
}
regression_results = restructure_field_properties_dict(constants, regression_field_properties, total_regression_properties)
return regression_results
示例8: recursive_feature_elimination
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def recursive_feature_elimination(df, dependent_variable, independent_variables, interaction_terms=[], model_limit=5):
considered_independent_variables_per_model, patsy_models = \
construct_models(df, dependent_variable, independent_variables, interaction_terms, table_layout=MCT.ALL_VARIABLES.value)
y, X = dmatrices(patsy_models[0], df, return_type='dataframe')
estimator = SVR(kernel='linear')
selector = RFE(estimator, 5, step=1)
selector = selector.fit(X, y)
logger.info(selector.support_)
logger.info(selector.ranking_)
return
示例9: f_regression
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def f_regression(df, dependent_variable, independent_variables, interaction_terms=[], model_limit=5):
considered_independent_variables_per_model, patsy_models = \
construct_models(df, dependent_variable, independent_variables, interaction_terms, table_layout=MCT.ALL_VARIABLES.value)
y, X = dmatrices(patsy_models[0], df, return_type='dataframe')
f_test, r = f_regression(X, y, center=True)
logger.info(f_test)
logger.info(r)
return
示例10: ordered_logit_processing
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def ordered_logit_processing(formula, data):
"""Process user input for an ordered logit model."""
# extract data arrays
y, x = dmatrices(formula + " - 1", data, return_type="dataframe")
y = y[y.columns[0]]
# extract dimensions
num_choices = len(y.unique())
beta_names = list(x.columns)
num_betas = len(beta_names)
num_cutoffs = num_choices - 1
# set-up index for params_df
names = beta_names + list(range(num_cutoffs))
categories = ["beta"] * num_betas + ["cutoff"] * num_cutoffs
index = pd.MultiIndex.from_tuples(zip(categories, names), names=["type", "name"])
# make params_df
np.random.seed(5471)
start_params = pd.DataFrame(index=index)
start_params["value"] = np.hstack(
[
np.random.uniform(low=-0.5, high=0.5, size=len(x.columns)),
np.arange(num_cutoffs) * 2,
]
)
# make constraints
constr = [{"loc": "cutoff", "type": "increasing"}]
return start_params, y.to_numpy().astype(int), x.to_numpy(), constr
示例11: gls_formula
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def gls_formula(data, xseq, **params):
"""
Fit GLL using a formula
"""
eval_env = params['enviroment']
formula = params['formula']
init_kwargs, fit_kwargs = separate_method_kwargs(
params['method_args'], sm.GLS, sm.GLS.fit)
model = smf.gls(
formula,
data,
eval_env=eval_env,
**init_kwargs
)
results = model.fit(**fit_kwargs)
data = pd.DataFrame({'x': xseq})
data['y'] = results.predict(data)
if params['se']:
_, predictors = dmatrices(formula, data, eval_env=eval_env)
alpha = 1 - params['level']
prstd, iv_l, iv_u = wls_prediction_std(
results, predictors, alpha=alpha)
data['se'] = prstd
data['ymin'] = iv_l
data['ymax'] = iv_u
return data
示例12: __init__
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def __init__(self,formula,data):
# Initialize TSM object
super(DynReg,self).__init__('DynReg')
# Latent variables
self.max_lag = 0
self._z_hide = 0 # Whether to cutoff variance latent variables from results
self.supported_methods = ["MLE","PML","Laplace","M-H","BBVI"]
self.default_method = "MLE"
self.model_name = "Dynamic Linear Regression"
self.multivariate_model = False
# Format the data
self.is_pandas = True # This is compulsory for this model type
self.data_original = data
self.formula = formula
self.y, self.X = dmatrices(formula, data)
self.z_no = self.X.shape[1] + 1
self.y_name = self.y.design_info.describe()
self.data_name = self.y_name
self.X_names = self.X.design_info.describe().split(" + ")
self.y = np.array([self.y]).ravel()
self.data = self.y
self.X = np.array([self.X])[0]
self.index = data.index
self._create_latent_variables()
示例13: __init__
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def __init__(self, data, p, q, formula):
# Initialize TSM object
super(EGARCHMReg,self).__init__('EGARCHMReg')
# Latent variables
self.p = p
self.q = q
self.max_lag = max(self.p,self.q)
self.z_no = self.p + self.q + 2
self._z_hide = 0 # Whether to cutoff variance latent variables from results
self.supported_methods = ["MLE","PML","Laplace","M-H","BBVI"]
self.default_method = "MLE"
self.multivariate_model = False
self.leverage = False
self.model_name = "EGARCHMReg(" + str(self.p) + "," + str(self.q) + ")"
# Format the data
self.is_pandas = True # This is compulsory for this model type
self.data_original = data
self.formula = formula
self.y, self.X = dmatrices(formula, data)
self.z_no += self.X.shape[1]*2
self.y_name = self.y.design_info.describe()
self.data_name = self.y_name
self.X_names = self.X.design_info.describe().split(" + ")
self.y = np.array([self.y]).ravel()
self.data = self.y
self.data_length = len(self.data)
self.X = np.array([self.X])[0]
self.index = data.index
self.initial_values = np.zeros(self.z_no)
self._create_latent_variables()
示例14: _chunk_boot_ols_coefs
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def _chunk_boot_ols_coefs(dat, formula, weights, seed):
"""
OLS computation of coefficients to be used in a parallelization context.
"""
# Random sample with replacement from all data
dat = dat.sample(frac=1, replace=True, random_state=seed)
y, x = dmatrices(formula, dat, 1, return_type="dataframe")
b = _ols(
x, y, robust=None, n_lags=1, cluster=None, all_stats=False, weights=weights
)
return list(b)
示例15: _ols_group
# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def _ols_group(dat, formula, group_col, group, rank):
"""Compute OLS on data given a formula. Used by Lm2"""
dat = dat[dat[group_col] == group].reset_index(drop=True)
if rank:
dat = dat.rank()
y, x = dmatrices(formula, dat, 1, return_type="dataframe")
b = _ols(x, y, robust=None, n_lags=1, cluster=None, all_stats=False)
return list(b)