本文整理汇总了Python中statsmodels.formula.api.logit函数的典型用法代码示例。如果您正苦于以下问题:Python logit函数的具体用法?Python logit怎么用?Python logit使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了logit函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: reduce_multi_model
def reduce_multi_model(orig_fitted, base_string, res, df, fit=None):
"""orig_fitted = an object returned from calling .fit() on a statsmodels logit model
base_string = the right hand side of the formula used to estimate orig_fitted
res = The string for the column name in df that has the classes.
df = the pandas dataframe from which orig_fitted was estimated
==========
Returns a fitted logistic regression model, and the base string used to estimate
the model.
If at least one variable has a p-value which is > 0.05, this function will
removes the variable with the worst p-value, estimate a new logistic regression,
and repeat the process until no more insignificant variables can be removed."""
#Check the class of the function inputs
assert isinstance(base_string, str)
assert isinstance(res, str)
assert isinstance(df, pd.DataFrame)
#Try to reduce the number of variables in the original model
new_bvars = whittle_multi_model_vars(orig_fitted, base_string)
#Initialize a variable for the smallest model
small_model = orig_fitted
#Initialize a variable for the smallest model base_string
small_base = base_string
node_variables = isolate_node_cols(df)
while new_bvars is not None: #If a reduced set of variables has been found
#new_base = " + ".join(["0"] + new_bvars) #Create a new base_string
#new_fstring = res + " ~ " + new_base #Create a new statsmodels formula string
model_vars = combat_multi_collinearity(df, new_bvars, node_variables, max_cond=2000)
new_base = " + ".join(model_vars) #Create a string of all variables using in the multivariate regression
new_fstring = res + " ~ " + "0 + " + new_base #Create the new formula string
try: #Try to fit a new logistic regression model
#Use the if...else statement to accomodate various optimization methods
if fit is None:
new_model = smf.logit(new_fstring, data = df).fit(maxiter=2000, disp=False)
else:
new_model = smf.logit(new_fstring, data = df).fit(method=fit, maxiter=2000, disp=False)
#Assign small_base to the smallest identified set of base variables so far
small_base = " + ".join(new_bvars)
#Assign small_model to the model with smallest set of base variables so far
small_model = new_model
#Search for new base variables
new_bvars = whittle_multi_model_vars(new_model, new_base)
except Exception as inst: #If the model could not be fit, print a message saying so
#print "Estimating logit model failed when using formula: {}".format(new_fstring)
#Note the line below is un-tested, but I added it because it seemed
#that an infinite loop would result without it.
#print inst
new_bvars = None
#Print the model results of the most reduced model.
#print "="*10
#print "The reduced model results are:"
#print small_model.summary()
return small_model, small_base
示例2: RunLogisticModels
def RunLogisticModels(live):
"""Runs regressions that predict sex.
live: DataFrame of pregnancy records
"""
#live = linear.ResampleRowsWeighted(live)
df = live[live.prglngth>30]
# df = JoinFemResp(df)
df['boy'] = (df.babysex==1).astype(int)
df['isyoung'] = (df.agepreg<20).astype(int)
df['isold'] = (df.agepreg<35).astype(int)
df['season'] = (((df.datend+1) % 12) / 3).astype(int)
# run the simple model
model = smf.logit('boy ~ agepreg', data=df)
results = model.fit()
print('nobs', results.nobs)
print(type(results))
SummarizeResults(results)
# run the complex model
model = smf.logit('boy ~ agepreg + hpagelb + birthord + C(race)', data=df)
results = model.fit()
print('nobs', results.nobs)
print(type(results))
SummarizeResults(results)
# make the scatter plot
exog = pandas.DataFrame(model.exog, columns=model.exog_names)
endog = pandas.DataFrame(model.endog, columns=[model.endog_names])
xs = exog['agepreg']
lo = results.fittedvalues
o = np.exp(lo)
p = o / (o+1)
#thinkplot.Scatter(xs, p, alpha=0.1)
#thinkplot.Show()
# compute accuracy
actual = endog['boy']
baseline = actual.mean()
predict = (results.predict() >= 0.5)
true_pos = predict * actual
true_neg = (1 - predict) * (1 - actual)
acc = (sum(true_pos) + sum(true_neg)) / len(actual)
print(acc, baseline)
columns = ['agepreg', 'hpagelb', 'birthord', 'race']
new = pandas.DataFrame([[35, 39, 3, 1]], columns=columns)
y = results.predict(new)
print(y)
示例3: calculate_odds_ratio
def calculate_odds_ratio(genotypes, phen_vector1, phen_vector2, reg_type, covariates, response='',
phen_vector3=''): # diff - done
"""
Runs the regression for a specific phenotype vector relative to the genotype data and covariates.
:param genotypes: a DataFrame containing the genotype information
:param phen_vector: a array containing the phenotype vector
:param covariates: a string containing all desired covariates
:type genotypes: pandas DataFrame
:type phen_vector: numpy array
:type covariates: string
.. note::
The covariates must be a string that is delimited by '+', not a list.
If you are using a list of covariates and would like to convert it to the pyPhewas format, use the following::
l = ['genotype', 'age'] # a list of your covariates
covariates = '+'.join(l) # pyPhewas format
The covariates that are listed here *must* be headers to your genotype CSV file.
"""
data = genotypes
data['y'] = phen_vector1
data['MaxAgeAtCPT'] = phen_vector2
# f='y~'+covariates
if response:
f = response + '~ y + genotype +' + covariates
if phen_vector3.any():
data['phe'] = phen_vector3
f = response + '~ y + phe + genotype' + covariates
else:
f = 'genotype ~ y +' + covariates
if phen_vector3.any():
data['phe'] = phen_vector3
f = 'genotype ~ y + phe +' + covariates
try:
if reg_type == 0:
logreg = smf.logit(f, data).fit(method='bfgs', disp=False)
p = logreg.pvalues.y
odds = logreg.params.y
conf = logreg.conf_int()
od = [-math.log10(p), logreg.params.y, '[%s,%s]' % (conf[0]['y'], conf[1]['y'])]
else:
linreg = smf.logit(f, data).fit(method='bfgs', disp=False)
p = linreg.pvalues.y
odds = linreg.params.y
conf = linreg.conf_int()
od = [-math.log10(p), linreg.params.y, '[%s,%s]' % (conf[0]['y'], conf[1]['y'])]
except:
odds = 0
p = np.nan
od = [np.nan, np.nan, np.nan]
return (odds, p, od)
示例4: logistic_regression
def logistic_regression(self, use_glm=True):
"""
(b) it seems the statistical significant predict variable is only Lag2. How disappointing...
"""
formula = "Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume"
model = (
smf.glm(formula, data=self.df, family=sm.families.Binomial())
if use_glm
else smf.logit(formula, data=self.transformedDF)
)
result = model.fit()
if use_glm:
probs = result.fittedvalues
"""Beware the prob here is the index 0's prob, so we should use the lambda function below"""
pred_values = probs.map(lambda x: 0 if x > 0.5 else 1)
else:
"""The probability of being 1"""
probs = Series(result.predict(sm.add_constant(self.df[["Lag1", "Lag2", "Lag3", "Lag4", "Lag5", "Volume"]])))
pred_values = probs.map(lambda x: 1 if x > 0.5 else 0)
"""
(c) Percentage of currect predictions: (54+557)/(54+557+48+430) = 56.1%.
Weeks the market goes up the logistic regression is right most of the time, 557/(557+48) = 92.1%.
Weeks the market goes up the logistic regression is wrong most of the time 54/(430+54) = 11.2%.
"""
tp.output_table(pred_values.values, self.transformedDF[self.y_col].values)
示例5: logistic_model
def logistic_model(data, explanatory_variables, response_variable,
maxiter = 35, verbose = True):
explanatory_vars = ' + '.join(explanatory_variables)
formula = response_variable + ' ~ ' + explanatory_vars
try:
model = smf.logit(formula = formula, data = data).fit(maxiter = maxiter)
except:
print('Error "' + str(sys.exc_info()[1]) + '" while processing model', formula)
model = None
if verbose and model != None:
print()
print('MODEL:', formula, '\n')
print(model.summary())
print()
# odds ratios with 95% confidence intervals
print ("Odds Ratios")
params = model.params
conf = model.conf_int()
conf['OR'] = params
conf.columns = ['Lower CI', 'Upper CI', 'Odds Ratios']
print (numpy.exp(conf))
return(model)
示例6: LogisticRegressionExample
def LogisticRegressionExample():
"""Runs a simple example of logistic regression and prints results.
"""
y = np.array([0, 1, 0, 1])
x1 = np.array([0, 0, 0, 1])
x2 = np.array([0, 1, 1, 1])
beta = [-1.5, 2.8, 1.1]
log_o = beta[0] + beta[1] * x1 + beta[2] * x2
print(log_o)
o = np.exp(log_o)
print(o)
p = o / (o+1)
print(p)
like = y * p + (1-y) * (1-p)
print(like)
print(np.prod(like))
df = pandas.DataFrame(dict(y=y, x1=x1, x2=x2))
results = smf.logit('y ~ x1 + x2', data=df).fit()
print(results.summary())
示例7: _corr
def _corr(self, sel, suffix):
formula = str('model_accuracy ~ human_accuracy')
logreg = smf.logit(formula=formula, data=sel).fit()
summ = logreg.summary()
if self.html is None:
print(summ)
else:
summ = summ.as_html().replace('class="simpletable"',
'class="simpletable table"')
sel = sel.rename(columns={'human_accuracy': 'human accuracy',
'model_accuracy': 'model accuracy'})
sns.lmplot('human accuracy', 'model accuracy', data=sel, x_jitter=.01,
y_jitter=.05, logistic=True, truncate=True)
bins = np.digitize(sel['human accuracy'], np.arange(.05,1,.1))
#bins[bins==11] = 10
count = sel['model accuracy'].groupby(bins).count()
mean = sel['model accuracy'].groupby(bins).mean()
sns.plt.scatter(.1*mean.index, mean, s=10*count, c='.15',
linewidths=0, alpha=.8)
sns.plt.title(models.NICE_NAMES[self.model_name])
sns.plt.xlim([-.1, 1.1])
sns.plt.ylim([-.1, 1.1])
self.show(pref='corr_sil', suffix=self.model_name + '_' + suffix,
caption=suffix + summ)
示例8: run_logits
def run_logits(grouped, formula, var):
for code, group in grouped:
country = get_country(code).ljust(14)
model = smf.logit(formula, data=group)
results = model.fit(disp=False)
nobs, param, stars = extract_res(results, var=var)
arrow = '<--' if stars and param > 0 else ''
print(country, nobs, '%0.3g'%param, stars, arrow, sep='\t')
示例9: log_reg
def log_reg(formula, df):
try:
model1 = smf.logit(formula = formula, data=df).fit()
print model1.summary()
except Exception:
print "+" * 40
print "bad formula"
print "+" * 40
示例10: fit_model
def fit_model(formula, model_file):
"""
Saves a model
:param formula: formula for the model
:param model_file: name of file to save the model to
"""
data = load_data()
model = logit(formula=formula, data=data)
fitted = model.fit()
fitted.save(model_file)
示例11: logistic_regression_test
def logistic_regression_test():
df = pandas.DataFrame.from_csv('./generated_logistic_data.csv')
generated_model = smf.logit('y ~ variable_a + variable_b + variable_c', df)
generated_fit = generated_model.fit()
roc_data = sklearn.metrics.roc_curve(df['y'], generated_fit.predict(df))
auc = sklearn.metrics.auc(roc_data[0], roc_data[1])
print generated_fit.summary()
print "AUC score: {0}".format(auc)
assert auc > .8, 'AUC should be significantly above random'
示例12: generate_model
def generate_model(df):
'''
Create a logistic regression model from loans data based on fields
FICO.score, Interest.Rate, and Interest.below12
:param df: a dataframe with fields for the independent vars fico and interest
and the dependent var discrete_rate
:return: a fitted logistic model
'''
model = smf.logit(formula='discrete_rate ~ fico + interest', data=df)
fitted_model = model.fit()
return fitted_model
示例13: logRegR
def logRegR(self, event):
# would have to mess with Patsy formula parser to get more powerful...
# too much work
dlg = wx.TextEntryDialog(self.parent, "Enter the linear regression formula")
if dlg.ShowModal() == wx.ID_OK:
model = smf.logit(formula=dlg.GetValue(), data=self.parent.data.data)
results = model.fit()
self.parent.write("\n" + str(results.summary()) + "\n")
sns.regplot(results.predict(), model.endog, ci=False, y_jitter=0.2)
plt.show()
dlg.Destroy()
示例14: fit_model
def fit_model(y, formula, df):
from statsmodels.formula.api import ols, logit
# If you have a dichotomous variable then
# we're going to run a logistic regression
if df[y].nunique() == 2:
lm = logit(formula, df).fit()
# otherwise we'll run an ordinary least
# squares regression
else:
lm = ols(formula, df).fit()
return lm
示例15: check_initial_specification
def check_initial_specification(dataframe, result_string, new_var, min_specification, fit_word=None):
assert isinstance(dataframe, pd.DataFrame) #Make sure dataframe is a pandas dataframe.
assert isinstance(result_string, str) #Make sure the result_string is actually a string
assert isinstance(new_var, list) #Make sure new_var is a list
assert isinstance(min_specification, str) #Make sure the min_specification is a string
base_vars = min_specification.split(" + ") #Extract the variables used in the minimum specification
if "0" in base_vars: #Remove any zeros from the variables used in the minimum specification
base_vars.remove("0")
#Initialize starting values for the optimization
start_vals = np.random.rand(len(base_vars + new_var))
#Create the formula string for the logistic regression
fString = result_string + " ~ " + min_specification + " + " + " + ".join(new_var)
#Make sure the matrix for the logistic regression is invertible
if not check_full_rank(dataframe, base_vars + new_var):
#If not, raise an error
raise Exception("The base model plus {} is not of full rank.".format(new_var))
#Fit the logistic regression
if fit_word is None:
model = smf.logit(fString, data=dataframe).fit(start_params = start_vals, maxiter=2000, disp=False)
else:
model = smf.logit(fString, data=dataframe).fit(method=fit_word, start_params = start_vals, maxiter=2000, disp=False)
if not model.mle_retvals["converged"]: #Check if the model converged
#If it did not, raise an error
raise Exception("The model for {} did not converge".format(new_var))
lowest_pval = model.pvalues[new_var[0]] #Initialize a value for the lowest p-value
for orig_var in new_var: #Iterate through the new variables
current_pval = model.pvalues[orig_var]
#If the current variables p-value is less than the lowest p-value
if current_pval < lowest_pval:
#Keep track of this number
lowest_pval = current_pval
return lowest_pval