当前位置: 首页>>代码示例>>Python>>正文


Python patsy.dmatrices方法代码示例

本文整理汇总了Python中patsy.dmatrices方法的典型用法代码示例。如果您正苦于以下问题:Python patsy.dmatrices方法的具体用法?Python patsy.dmatrices怎么用?Python patsy.dmatrices使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在patsy的用法示例。


在下文中一共展示了patsy.dmatrices方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __setstate__

# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def __setstate__(self, d):
        if "restore_design_info" in d:
            # NOTE: there may be a more performant way to do this
            from patsy import dmatrices, PatsyError
            exc = []
            try:
                data = d['frame']
            except KeyError:
                data = d['orig_endog'].join(d['orig_exog'])

            for depth in [2, 3, 1, 0, 4]:  # sequence is a guess where to likely find it
                try:
                    _, design = dmatrices(d['formula'], data, eval_env=depth,
                                          return_type='dataframe')
                    break
                except (NameError, PatsyError) as e:
                    print('not in depth %d' % depth)
                    exc.append(e)   # why do I need a reference from outside except block
                    pass
            else:
                raise exc[-1]

            self.design_info = design.design_info
            del d["restore_design_info"]
        self.__dict__.update(d) 
开发者ID:birforce,项目名称:vnpy_crypto,代码行数:27,代码来源:data.py

示例2: transform_with_patsy

# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def transform_with_patsy(formula, data, *args, **kwargs):
    try:
        # needs patsy v0.5.1 to support formula in Python 3.7
        # https://github.com/pydata/patsy/pull/131
        import patsy
    except ImportError:
        raise ImportError("'patsy' is required to transform with string formula")

    if '~' in formula:
        y, X = patsy.dmatrices(formula, data=data, return_type='dataframe',
                               *args, **kwargs)
        if len(y.shape) > 1 and y.shape[1] != 1:
            raise ValueError('target must be 1 dimensional')
        y = y.iloc[:, 0]
        return data._constructor(X, target=y)
    else:
        X = patsy.dmatrix(formula, data=data, return_type='dataframe',
                          *args, **kwargs)
        return data._constructor(X) 
开发者ID:pandas-ml,项目名称:pandas-ml,代码行数:21,代码来源:patsy_wraps.py

示例3: fit

# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def fit(self, data, y=None):
        """Fit the scikit-learn model using the formula.

        Parameters
        ----------
        data : dict-like (pandas dataframe)
            Input data. Contains features and possible labels.
            Column names need to match variables in formula.
        """
        eval_env = EvalEnvironment.capture(self.eval_env, reference=1)
        formula = _drop_intercept(self.formula, self.add_intercept)
        design_y, design_X = dmatrices(formula, data, eval_env=eval_env,
                                       NA_action=self.NA_action)
        self.design_y_ = design_y.design_info
        self.design_X_ = design_X.design_info
        self.feature_names_ = design_X.design_info.column_names
        # convert to 1d vector so we don't get a warning
        # from sklearn.
        design_y = column_or_1d(design_y)
        est = clone(self.estimator)
        self.estimator_ = est.fit(design_X, design_y)
        return self 
开发者ID:amueller,项目名称:patsylearn,代码行数:24,代码来源:patsy_adaptor.py

示例4: test_from_formula_vs_no_formula

# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def test_from_formula_vs_no_formula():
    mod = _MultivariateOLS.from_formula(
        'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted',
        data)
    r = mod.fit(method='svd')
    r0 = r.mv_test()
    endog, exog = patsy.dmatrices(
        'Histamine0 + Histamine1 + Histamine3 + Histamine5 ~ Drug * Depleted',
        data, return_type="dataframe")
    L = np.array([[1, 0, 0, 0, 0, 0]])
    # DataFrame input
    r = _MultivariateOLS(endog, exog).fit(method='svd')
    r1 = r.mv_test(hypotheses=[['Intercept', L, None]])
    assert_array_almost_equal(r1['Intercept']['stat'].values,
                              r0['Intercept']['stat'].values, decimal=6)
    # Numpy array input
    r = _MultivariateOLS(endog.values, exog.values).fit(method='svd')
    r1 = r.mv_test(hypotheses=[['Intercept', L, None]])
    assert_array_almost_equal(r1['Intercept']['stat'].values,
                              r0['Intercept']['stat'].values, decimal=6)
    L = np.array([[0, 1, 0, 0, 0, 0],
                  [0, 0, 1, 0, 0, 0],
                  ])
    r1 = r.mv_test(hypotheses=[['Drug', L, None]])
    # DataFrame input
    r = _MultivariateOLS(endog, exog).fit(method='svd')
    r1 = r.mv_test(hypotheses=[['Drug', L, None]])
    assert_array_almost_equal(r1['Drug']['stat'].values,
                              r0['Drug']['stat'].values, decimal=6)
    # Numpy array input
    r = _MultivariateOLS(endog.values, exog.values).fit(method='svd')
    r1 = r.mv_test(hypotheses=[['Drug', L, None]])
    assert_array_almost_equal(r1['Drug']['stat'].values,
                              r0['Drug']['stat'].values, decimal=6) 
开发者ID:birforce,项目名称:vnpy_crypto,代码行数:36,代码来源:test_multivariate_ols.py

示例5: get_design_matrices

# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def get_design_matrices(df, dependent_variable, independent_variables, interactions=[]):
    patsy_model = create_patsy_model(dependent_variable, independent_variables, interactions=interactions)
    y, X = dmatrices(patsy_model, df, return_type='dataframe')
    return (y, X) 
开发者ID:MacroConnections,项目名称:DIVE-backend,代码行数:6,代码来源:utilities.py

示例6: run_linear_regression

# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def run_linear_regression(df, patsy_model, dependent_variable, estimator, weights):
    y, X = dmatrices(patsy_model, df, return_type='dataframe')
    model_result = sm.OLS(y, X).fit()

    p_values = model_result.pvalues.to_dict()
    t_values = model_result.tvalues.to_dict()
    params = model_result.params.to_dict()
    ste = model_result.bse.to_dict()
    conf_ints = parse_confidence_intervals(model_result)

    constants = {
        'p_value': p_values.get('Intercept'),
        't_value': t_values.get('Intercept'),
        'coefficient': params.get('Intercept'),
        'standard_error': ste.get('Intercept'),
        'conf_int': conf_ints.get('Intercept')
    }

    regression_field_properties = {
        'p_value': p_values,
        't_value': t_values,
        'coefficient': params,
        'standard_error': ste,
        'conf_int': conf_ints
    }

    total_regression_properties = {
        'aic': model_result.aic,
        'bic': model_result.bic,
        'dof': model_result.nobs,
        'r_squared': model_result.rsquared,
        'r_squared_adj': model_result.rsquared_adj,
        'f_test': model_result.fvalue,
        # 'resid': model_result.resid.tolist()
    }

    regression_results = restructure_field_properties_dict(constants, regression_field_properties, total_regression_properties)

    return regression_results 
开发者ID:MacroConnections,项目名称:DIVE-backend,代码行数:41,代码来源:pipelines.py

示例7: run_logistic_regression

# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def run_logistic_regression(df, patsy_model, dependent_variable, estimator, weights):
    y, X = dmatrices(patsy_model, df, return_type='dataframe')

    model_result = discrete_model.MNLogit(y, X).fit(maxiter=100, disp=False, method="nm")

    p_values = model_result.pvalues[0].to_dict()
    t_values = model_result.tvalues[0].to_dict()
    params = model_result.params[0].to_dict()
    ste = model_result.bse[0].to_dict()

    constants = {
        'p_value': p_values.get('Intercept'),
        't_value': t_values.get('Intercept'),
        'coefficient': params.get('Intercept'),
        'standard_error': ste.get('Intercept')
    }

    regression_field_properties = {
        'p_value': p_values,
        't_value': t_values,
        'coefficient': params,
        'standard_error': ste
    }

    total_regression_properties = {
        'aic': model_result.aic,
        'bic': model_result.bic,
        'r_squared': model_result.prsquared,
        'r_squared_adj': model_result.prsquared,
        'llf': model_result.llf,
        'llnull': model_result.llnull,
        'llr_pvalue': model_result.llr_pvalue
        # 'f_test': model_result.f_test
    }

    regression_results = restructure_field_properties_dict(constants, regression_field_properties, total_regression_properties)

    return regression_results 
开发者ID:MacroConnections,项目名称:DIVE-backend,代码行数:40,代码来源:pipelines.py

示例8: recursive_feature_elimination

# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def recursive_feature_elimination(df, dependent_variable, independent_variables, interaction_terms=[], model_limit=5):
    considered_independent_variables_per_model, patsy_models = \
        construct_models(df, dependent_variable, independent_variables, interaction_terms, table_layout=MCT.ALL_VARIABLES.value)
    y, X = dmatrices(patsy_models[0], df, return_type='dataframe')

    estimator = SVR(kernel='linear')
    selector = RFE(estimator, 5, step=1)
    selector = selector.fit(X, y)
    logger.info(selector.support_)
    logger.info(selector.ranking_)
    return 
开发者ID:MacroConnections,项目名称:DIVE-backend,代码行数:13,代码来源:model_recommendation.py

示例9: f_regression

# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def f_regression(df, dependent_variable, independent_variables, interaction_terms=[], model_limit=5):
    considered_independent_variables_per_model, patsy_models = \
        construct_models(df, dependent_variable, independent_variables, interaction_terms, table_layout=MCT.ALL_VARIABLES.value)
    y, X = dmatrices(patsy_models[0], df, return_type='dataframe')

    f_test, r = f_regression(X, y, center=True)
    logger.info(f_test)
    logger.info(r)
    return 
开发者ID:MacroConnections,项目名称:DIVE-backend,代码行数:11,代码来源:model_recommendation.py

示例10: ordered_logit_processing

# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def ordered_logit_processing(formula, data):
    """Process user input for an ordered logit model."""
    # extract data arrays
    y, x = dmatrices(formula + " - 1", data, return_type="dataframe")
    y = y[y.columns[0]]

    # extract dimensions
    num_choices = len(y.unique())
    beta_names = list(x.columns)
    num_betas = len(beta_names)
    num_cutoffs = num_choices - 1

    # set-up index for params_df
    names = beta_names + list(range(num_cutoffs))
    categories = ["beta"] * num_betas + ["cutoff"] * num_cutoffs
    index = pd.MultiIndex.from_tuples(zip(categories, names), names=["type", "name"])

    # make params_df
    np.random.seed(5471)
    start_params = pd.DataFrame(index=index)
    start_params["value"] = np.hstack(
        [
            np.random.uniform(low=-0.5, high=0.5, size=len(x.columns)),
            np.arange(num_cutoffs) * 2,
        ]
    )

    # make constraints
    constr = [{"loc": "cutoff", "type": "increasing"}]

    return start_params, y.to_numpy().astype(int), x.to_numpy(), constr 
开发者ID:OpenSourceEconomics,项目名称:estimagic,代码行数:33,代码来源:ordered_logit.py

示例11: gls_formula

# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def gls_formula(data, xseq, **params):
    """
    Fit GLL using a formula
    """
    eval_env = params['enviroment']
    formula = params['formula']
    init_kwargs, fit_kwargs = separate_method_kwargs(
        params['method_args'], sm.GLS, sm.GLS.fit)
    model = smf.gls(
        formula,
        data,
        eval_env=eval_env,
        **init_kwargs
    )
    results = model.fit(**fit_kwargs)
    data = pd.DataFrame({'x': xseq})
    data['y'] = results.predict(data)

    if params['se']:
        _, predictors = dmatrices(formula, data, eval_env=eval_env)
        alpha = 1 - params['level']
        prstd, iv_l, iv_u = wls_prediction_std(
            results, predictors, alpha=alpha)
        data['se'] = prstd
        data['ymin'] = iv_l
        data['ymax'] = iv_u
    return data 
开发者ID:has2k1,项目名称:plotnine,代码行数:29,代码来源:smoothers.py

示例12: __init__

# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def __init__(self,formula,data):

        # Initialize TSM object
        super(DynReg,self).__init__('DynReg')

        # Latent variables
        self.max_lag = 0
        self._z_hide = 0 # Whether to cutoff variance latent variables from results
        self.supported_methods = ["MLE","PML","Laplace","M-H","BBVI"]
        self.default_method = "MLE"
        self.model_name = "Dynamic Linear Regression"
        self.multivariate_model = False

        # Format the data
        self.is_pandas = True # This is compulsory for this model type
        self.data_original = data
        self.formula = formula
        self.y, self.X = dmatrices(formula, data)
        self.z_no = self.X.shape[1] + 1
        self.y_name = self.y.design_info.describe()
        self.data_name = self.y_name
        self.X_names = self.X.design_info.describe().split(" + ")
        self.y = np.array([self.y]).ravel()
        self.data = self.y
        self.X = np.array([self.X])[0]
        self.index = data.index

        self._create_latent_variables() 
开发者ID:RJT1990,项目名称:pyflux,代码行数:30,代码来源:dynlin.py

示例13: __init__

# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def __init__(self, data, p, q, formula):

        # Initialize TSM object
        super(EGARCHMReg,self).__init__('EGARCHMReg')

        # Latent variables
        self.p = p
        self.q = q
        self.max_lag = max(self.p,self.q)  
        self.z_no = self.p + self.q + 2
        self._z_hide = 0 # Whether to cutoff variance latent variables from results
        self.supported_methods = ["MLE","PML","Laplace","M-H","BBVI"]
        self.default_method = "MLE"
        self.multivariate_model = False
        self.leverage = False
        self.model_name = "EGARCHMReg(" + str(self.p) + "," + str(self.q) + ")"

        # Format the data
        self.is_pandas = True # This is compulsory for this model type
        self.data_original = data
        self.formula = formula
        self.y, self.X = dmatrices(formula, data)
        self.z_no += self.X.shape[1]*2
        self.y_name = self.y.design_info.describe()
        self.data_name = self.y_name
        self.X_names = self.X.design_info.describe().split(" + ")
        self.y = np.array([self.y]).ravel()
        self.data = self.y
        self.data_length = len(self.data)
        self.X = np.array([self.X])[0]
        self.index = data.index
        self.initial_values = np.zeros(self.z_no)

        self._create_latent_variables() 
开发者ID:RJT1990,项目名称:pyflux,代码行数:36,代码来源:egarchmreg.py

示例14: _chunk_boot_ols_coefs

# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def _chunk_boot_ols_coefs(dat, formula, weights, seed):
    """
    OLS computation of coefficients to be used in a parallelization context.
    """
    # Random sample with replacement from all data
    dat = dat.sample(frac=1, replace=True, random_state=seed)
    y, x = dmatrices(formula, dat, 1, return_type="dataframe")
    b = _ols(
        x, y, robust=None, n_lags=1, cluster=None, all_stats=False, weights=weights
    )
    return list(b) 
开发者ID:ejolly,项目名称:pymer4,代码行数:13,代码来源:utils.py

示例15: _ols_group

# 需要导入模块: import patsy [as 别名]
# 或者: from patsy import dmatrices [as 别名]
def _ols_group(dat, formula, group_col, group, rank):
    """Compute OLS on data given a formula. Used by Lm2"""
    dat = dat[dat[group_col] == group].reset_index(drop=True)
    if rank:
        dat = dat.rank()
    y, x = dmatrices(formula, dat, 1, return_type="dataframe")
    b = _ols(x, y, robust=None, n_lags=1, cluster=None, all_stats=False)
    return list(b) 
开发者ID:ejolly,项目名称:pymer4,代码行数:10,代码来源:utils.py


注:本文中的patsy.dmatrices方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。