当前位置: 首页>>代码示例>>Python>>正文


Python api.logit函数代码示例

本文整理汇总了Python中statsmodels.formula.api.logit函数的典型用法代码示例。如果您正苦于以下问题:Python logit函数的具体用法?Python logit怎么用?Python logit使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了logit函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: reduce_multi_model

def reduce_multi_model(orig_fitted, base_string, res, df, fit=None):
    """orig_fitted = an object returned from calling .fit() on a statsmodels logit model
    base_string = the right hand side of the formula used to estimate orig_fitted
    res = The string for the column name in df that has the classes.
    df = the pandas dataframe from which orig_fitted was estimated
    ==========
    Returns a fitted logistic regression model, and the base string used to estimate
    the model.
    
    If at least one variable has a p-value which is > 0.05, this function will
    removes the variable with the worst p-value, estimate a new logistic regression,
    and repeat the process until no more insignificant variables can be removed."""
    
    #Check the class of the function inputs
    assert isinstance(base_string, str)
    assert isinstance(res, str)
    assert isinstance(df, pd.DataFrame)
    
    #Try to reduce the number of variables in the original model
    new_bvars = whittle_multi_model_vars(orig_fitted, base_string)
    #Initialize a variable for the smallest model
    small_model = orig_fitted
    #Initialize a variable for the smallest model base_string
    small_base = base_string
    
    node_variables = isolate_node_cols(df)
    
    while new_bvars is not None: #If a reduced set of variables has been found
        #new_base = " + ".join(["0"] + new_bvars) #Create a new base_string
        #new_fstring = res + " ~ " + new_base #Create a new statsmodels formula string
        
        model_vars = combat_multi_collinearity(df, new_bvars, node_variables, max_cond=2000)
        new_base = " + ".join(model_vars) #Create a string of all variables using in the multivariate regression
        new_fstring = res + " ~ " + "0 + " + new_base #Create the new formula string
        
        try: #Try to fit a new logistic regression model
        #Use the if...else statement to accomodate various optimization methods
            if fit is None:
                new_model = smf.logit(new_fstring, data = df).fit(maxiter=2000, disp=False)
            else:
                new_model = smf.logit(new_fstring, data = df).fit(method=fit, maxiter=2000, disp=False)
        #Assign small_base to the smallest identified set of base variables so far
            small_base = " + ".join(new_bvars)  
        #Assign small_model to the model with smallest set of base variables so far
            small_model = new_model
        #Search for new base variables
            new_bvars =  whittle_multi_model_vars(new_model, new_base)
        except Exception as inst: #If the model could not be fit, print a message saying so
            #print "Estimating logit model failed when using formula: {}".format(new_fstring)
            #Note the line below is un-tested, but I added it because it seemed
            #that an infinite loop would result without it.
            #print inst
            new_bvars = None

    #Print the model results of the most reduced model.            
    #print "="*10
    #print "The reduced model results are:"
    #print small_model.summary()
    
    return small_model, small_base
开发者ID:timothyb0912,项目名称:cs289_final_project,代码行数:60,代码来源:hybrid_logit.py

示例2: RunLogisticModels

def RunLogisticModels(live):
    """Runs regressions that predict sex.

    live: DataFrame of pregnancy records
    """
    #live = linear.ResampleRowsWeighted(live)

    df = live[live.prglngth>30]
    # df = JoinFemResp(df)

    df['boy'] = (df.babysex==1).astype(int)
    df['isyoung'] = (df.agepreg<20).astype(int)
    df['isold'] = (df.agepreg<35).astype(int)
    df['season'] = (((df.datend+1) % 12) / 3).astype(int)

    # run the simple model
    model = smf.logit('boy ~ agepreg', data=df)    
    results = model.fit()
    print('nobs', results.nobs)
    print(type(results))
    SummarizeResults(results)

    # run the complex model
    model = smf.logit('boy ~ agepreg + hpagelb + birthord + C(race)', data=df)
    results = model.fit()
    print('nobs', results.nobs)
    print(type(results))
    SummarizeResults(results)

    # make the scatter plot
    exog = pandas.DataFrame(model.exog, columns=model.exog_names)
    endog = pandas.DataFrame(model.endog, columns=[model.endog_names])
    
    xs = exog['agepreg']
    lo = results.fittedvalues
    o = np.exp(lo)
    p = o / (o+1)

    #thinkplot.Scatter(xs, p, alpha=0.1)
    #thinkplot.Show()

    # compute accuracy
    actual = endog['boy']
    baseline = actual.mean()

    predict = (results.predict() >= 0.5)
    true_pos = predict * actual
    true_neg = (1 - predict) * (1 - actual)

    acc = (sum(true_pos) + sum(true_neg)) / len(actual)
    print(acc, baseline)

    columns = ['agepreg', 'hpagelb', 'birthord', 'race']
    new = pandas.DataFrame([[35, 39, 3, 1]], columns=columns)
    y = results.predict(new)
    print(y)
开发者ID:Bercio,项目名称:ThinkStats2,代码行数:56,代码来源:regression.py

示例3: calculate_odds_ratio

def calculate_odds_ratio(genotypes, phen_vector1, phen_vector2, reg_type, covariates, response='',
                         phen_vector3=''):  # diff - done
    """
    Runs the regression for a specific phenotype vector relative to the genotype data and covariates.

    :param genotypes: a DataFrame containing the genotype information
    :param phen_vector: a array containing the phenotype vector
    :param covariates: a string containing all desired covariates
    :type genotypes: pandas DataFrame
    :type phen_vector: numpy array
    :type covariates: string

    .. note::
        The covariates must be a string that is delimited by '+', not a list.
        If you are using a list of covariates and would like to convert it to the pyPhewas format, use the following::

            l = ['genotype', 'age'] # a list of your covariates
            covariates = '+'.join(l) # pyPhewas format

        The covariates that are listed here *must* be headers to your genotype CSV file.
    """

    data = genotypes
    data['y'] = phen_vector1
    data['MaxAgeAtCPT'] = phen_vector2
    # f='y~'+covariates
    if response:
        f = response + '~ y + genotype +' + covariates
        if phen_vector3.any():
            data['phe'] = phen_vector3
            f = response + '~ y + phe + genotype' + covariates
    else:
        f = 'genotype ~ y +' + covariates
        if phen_vector3.any():
            data['phe'] = phen_vector3
            f = 'genotype ~ y + phe +' + covariates
    try:
        if reg_type == 0:
            logreg = smf.logit(f, data).fit(method='bfgs', disp=False)
            p = logreg.pvalues.y
            odds = logreg.params.y
            conf = logreg.conf_int()
            od = [-math.log10(p), logreg.params.y, '[%s,%s]' % (conf[0]['y'], conf[1]['y'])]
        else:
            linreg = smf.logit(f, data).fit(method='bfgs', disp=False)
            p = linreg.pvalues.y
            odds = linreg.params.y
            conf = linreg.conf_int()
            od = [-math.log10(p), linreg.params.y, '[%s,%s]' % (conf[0]['y'], conf[1]['y'])]
    except:
        odds = 0
        p = np.nan
        od = [np.nan, np.nan, np.nan]
    return (odds, p, od)
开发者ID:BennettLandman,项目名称:pyPheWAS,代码行数:54,代码来源:pyProWAS.py

示例4: logistic_regression

 def logistic_regression(self, use_glm=True):
     """
     (b) it seems the statistical significant predict variable is only Lag2. How disappointing...
     """
     formula = "Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume"
     model = (
         smf.glm(formula, data=self.df, family=sm.families.Binomial())
         if use_glm
         else smf.logit(formula, data=self.transformedDF)
     )
     result = model.fit()
     if use_glm:
         probs = result.fittedvalues
         """Beware the prob here is the index 0's prob, so we should use the lambda function below"""
         pred_values = probs.map(lambda x: 0 if x > 0.5 else 1)
     else:
         """The probability of being 1"""
         probs = Series(result.predict(sm.add_constant(self.df[["Lag1", "Lag2", "Lag3", "Lag4", "Lag5", "Volume"]])))
         pred_values = probs.map(lambda x: 1 if x > 0.5 else 0)
     """
     (c) Percentage of currect predictions: (54+557)/(54+557+48+430) = 56.1%.
         Weeks the market goes up the logistic regression is right most of the time, 557/(557+48) = 92.1%.
         Weeks the market goes up the logistic regression is wrong most of the time 54/(430+54) = 11.2%.
     """
     tp.output_table(pred_values.values, self.transformedDF[self.y_col].values)
开发者ID:Aran00,项目名称:ISIRExerciseCode,代码行数:25,代码来源:Exec10.py

示例5: logistic_model

def logistic_model(data, explanatory_variables, response_variable, 
                   maxiter = 35, verbose = True):
    explanatory_vars = ' + '.join(explanatory_variables)
    formula = response_variable + ' ~ ' + explanatory_vars

    try:
        model = smf.logit(formula = formula, data = data).fit(maxiter = maxiter)
    except:
        print('Error "' + str(sys.exc_info()[1]) + '" while processing model', formula)
        model = None
    
    if verbose and model != None:
        print()
        print('MODEL:', formula, '\n')
        print(model.summary())
        print()

        # odds ratios with 95% confidence intervals
        print ("Odds Ratios")
        params = model.params
        conf = model.conf_int()
        conf['OR'] = params
        conf.columns = ['Lower CI', 'Upper CI', 'Odds Ratios']
        print (numpy.exp(conf))
        
    return(model)
开发者ID:MColosso,项目名称:Forest-Fires,代码行数:26,代码来源:Forest+Fires+-+week+4.py

示例6: LogisticRegressionExample

def LogisticRegressionExample():
    """Runs a simple example of logistic regression and prints results.
    """
    y = np.array([0, 1, 0, 1])
    x1 = np.array([0, 0, 0, 1])
    x2 = np.array([0, 1, 1, 1])

    beta = [-1.5, 2.8, 1.1]

    log_o = beta[0] + beta[1] * x1 + beta[2] * x2 
    print(log_o)

    o = np.exp(log_o)
    print(o)

    p = o / (o+1)
    print(p)

    like = y * p + (1-y) * (1-p)
    print(like)
    print(np.prod(like))

    df = pandas.DataFrame(dict(y=y, x1=x1, x2=x2))
    results = smf.logit('y ~ x1 + x2', data=df).fit()
    print(results.summary())
开发者ID:13tsuyoshi,项目名称:ThinkStats2,代码行数:25,代码来源:regression.py

示例7: _corr

    def _corr(self, sel, suffix):
        formula = str('model_accuracy ~ human_accuracy')
        logreg = smf.logit(formula=formula, data=sel).fit()
        summ = logreg.summary()
        if self.html is None:
            print(summ)
        else:
            summ = summ.as_html().replace('class="simpletable"',
                                          'class="simpletable table"')

        sel = sel.rename(columns={'human_accuracy': 'human accuracy',
                                  'model_accuracy': 'model accuracy'})

        sns.lmplot('human accuracy', 'model accuracy', data=sel, x_jitter=.01,
                    y_jitter=.05, logistic=True, truncate=True)

        bins = np.digitize(sel['human accuracy'], np.arange(.05,1,.1))
        #bins[bins==11] = 10
        count = sel['model accuracy'].groupby(bins).count()
        mean = sel['model accuracy'].groupby(bins).mean()
        sns.plt.scatter(.1*mean.index, mean, s=10*count, c='.15',
                        linewidths=0, alpha=.8)
        sns.plt.title(models.NICE_NAMES[self.model_name])
        sns.plt.xlim([-.1, 1.1])
        sns.plt.ylim([-.1, 1.1])
        self.show(pref='corr_sil', suffix=self.model_name + '_' + suffix,
                  caption=suffix + summ)
开发者ID:mageed,项目名称:conv-exp,代码行数:27,代码来源:run.py

示例8: run_logits

def run_logits(grouped, formula, var):
    for code, group in grouped:
        country = get_country(code).ljust(14)
        model = smf.logit(formula, data=group)    
        results = model.fit(disp=False)
        nobs, param, stars = extract_res(results, var=var)
        arrow = '<--' if stars and param > 0 else ''
        print(country, nobs, '%0.3g'%param, stars, arrow, sep='\t')
开发者ID:Libardo1,项目名称:ProbablyOverthinkingIt,代码行数:8,代码来源:ess.py

示例9: log_reg

def log_reg(formula, df):
    try:
        model1 = smf.logit(formula = formula, data=df).fit()
        print model1.summary()
    except Exception:
        print "+" * 40
        print "bad formula"
        print "+" * 40
开发者ID:AlexanderMwangi,项目名称:DAT3-students,代码行数:8,代码来源:logit_reg_Patient2.py

示例10: fit_model

def fit_model(formula, model_file):
    """
    Saves a model
    :param formula: formula for the model
    :param model_file: name of file to save the model to
    """
    data = load_data()
    model = logit(formula=formula, data=data)
    fitted = model.fit()
    fitted.save(model_file)
开发者ID:NathanDeMaria,项目名称:Football,代码行数:10,代码来源:save_model.py

示例11: logistic_regression_test

def logistic_regression_test():
  df = pandas.DataFrame.from_csv('./generated_logistic_data.csv')

  generated_model = smf.logit('y ~ variable_a + variable_b + variable_c', df)
  generated_fit = generated_model.fit()
  roc_data = sklearn.metrics.roc_curve(df['y'], generated_fit.predict(df))
  auc = sklearn.metrics.auc(roc_data[0], roc_data[1])
  print generated_fit.summary()
  print "AUC score: {0}".format(auc)
  assert auc > .8, 'AUC should be significantly above random'
开发者ID:zbxzc35,项目名称:Machine-Learning-Test-by-Test,代码行数:10,代码来源:logistic_regression_tests.py

示例12: generate_model

def generate_model(df):
    '''
    Create a logistic regression model from loans data based on fields
    FICO.score, Interest.Rate, and Interest.below12
    :param df: a dataframe with fields for the independent vars fico and interest
    and the dependent var discrete_rate
    :return: a fitted logistic model
    '''
    model = smf.logit(formula='discrete_rate  ~ fico + interest', data=df)
    fitted_model = model.fit()
    return fitted_model
开发者ID:AkiraKane,项目名称:thinkful,代码行数:11,代码来源:U2L4P3_logistic_regression_analysis.py

示例13: logRegR

    def logRegR(self, event):
        # would have to mess with Patsy formula parser to get more powerful...
        # too much work
        dlg = wx.TextEntryDialog(self.parent, "Enter the linear regression formula")
        if dlg.ShowModal() == wx.ID_OK:
            model = smf.logit(formula=dlg.GetValue(), data=self.parent.data.data)
            results = model.fit()
            self.parent.write("\n" + str(results.summary()) + "\n")
            sns.regplot(results.predict(), model.endog, ci=False, y_jitter=0.2)
            plt.show()

        dlg.Destroy()
开发者ID:alanhdu,项目名称:Dex,代码行数:12,代码来源:Stats.py

示例14: fit_model

def fit_model(y, formula, df):
    from statsmodels.formula.api import ols, logit

    # If you have a dichotomous variable then
    # we're going to run a logistic regression
    if df[y].nunique() == 2:
        lm = logit(formula, df).fit()
    # otherwise we'll run an ordinary least
    # squares regression
    else:
        lm = ols(formula, df).fit()

    return lm
开发者ID:KirstieJane,项目名称:DESCRIBING_DATA,代码行数:13,代码来源:create_correlation_matrix.py

示例15: check_initial_specification

def check_initial_specification(dataframe, result_string, new_var, min_specification, fit_word=None):
    assert isinstance(dataframe, pd.DataFrame) #Make sure dataframe is a pandas dataframe.
    assert isinstance(result_string, str) #Make sure the result_string is actually a string
    assert isinstance(new_var, list) #Make sure new_var is a list
    assert isinstance(min_specification, str) #Make sure the min_specification is a string
    
    base_vars = min_specification.split(" + ") #Extract the variables used in the minimum specification
    if "0" in base_vars: #Remove any zeros from the variables used in the minimum specification
        base_vars.remove("0")
    
    #Initialize starting values for the optimization
    start_vals = np.random.rand(len(base_vars + new_var))
    
    #Create the formula string for the logistic regression
    fString = result_string + " ~ " + min_specification + " + " + " + ".join(new_var)
    
    #Make sure the matrix for the logistic regression is invertible
    if not check_full_rank(dataframe, base_vars + new_var):
        #If not, raise an error
        raise Exception("The base model plus {} is not of full rank.".format(new_var))
    
    #Fit the logistic regression
    if fit_word is None:
        model = smf.logit(fString, data=dataframe).fit(start_params = start_vals, maxiter=2000, disp=False)
    else:
        model = smf.logit(fString, data=dataframe).fit(method=fit_word, start_params = start_vals, maxiter=2000, disp=False)
        
    if not model.mle_retvals["converged"]: #Check if the model converged
        #If it did not, raise an error
        raise Exception("The model for {} did not converge".format(new_var))
        
    lowest_pval = model.pvalues[new_var[0]] #Initialize a value for the lowest p-value
    for orig_var in new_var: #Iterate through the new variables
        current_pval = model.pvalues[orig_var]
        #If the current variables p-value is less than the lowest p-value
        if current_pval < lowest_pval:
            #Keep track of this number
            lowest_pval = current_pval
    return lowest_pval
开发者ID:timothyb0912,项目名称:cs289_final_project,代码行数:39,代码来源:hybrid_logit.py


注:本文中的statsmodels.formula.api.logit函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。