当前位置: 首页>>代码示例>>Python>>正文


Python api.glm函数代码示例

本文整理汇总了Python中statsmodels.formula.api.glm函数的典型用法代码示例。如果您正苦于以下问题:Python glm函数的具体用法?Python glm怎么用?Python glm使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了glm函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: regression

    def regression(self):

        print self.people.head(n=1)
        self.people.rename(columns={'class': 'dbpedia_class'}, inplace=True) # all_bios is the dataframe with the consolidated data. somehow it doesn't work if the class column is named "class"


        self.logfile.write( "\n\n Sum Temp Interest NegBinom")
        m = glm("timeInterest ~ C(gender,Treatment(reference='male')) ", data=self.people, family=families.NegativeBinomial()).fit()
        self.logfile.write( "\n AIC"+str(+m.aic))
        for table in m.summary().tables:
            self.logfile.write(table.as_latex_tabular())

        self.logfile.write( "\n\n Sum Temp Interest OLS")
        m = ols("timeInterest ~ C(gender,Treatment(reference='male')) ", data=self.people).fit()
        self.logfile.write( "\n AIC"+str(+m.aic))
        for table in m.summary().tables:
            self.logfile.write(table.as_latex_tabular())


        self.logfile.write( "\n\n Pos Temp Interest NegBinom")
        m = glm("timePosInterest ~ C(gender,Treatment(reference='male')) ", data=self.people, family=families.NegativeBinomial()).fit()
        self.logfile.write( "\n AIC "+str(m.aic))
        self.logfile.write( "\n BIC "+str(m.bic))
        for table in m.summary().tables:
            self.logfile.write(table.as_latex_tabular())

        #lim_people = self.people[self.people.timePosInterest>0]
        self.logfile.write( "\n\n Pos Temp Interest OLS")
        m = ols("timePosInterest ~ C(gender,Treatment(reference='male')) ", data=self.people).fit()
        self.logfile.write( "\n AIC "+str(m.aic))
        self.logfile.write( "\n BIC "+str(m.bic))
        for table in m.summary().tables:
            self.logfile.write(table.as_latex_tabular())
开发者ID:clauwag,项目名称:WikipediaGenderInequality,代码行数:33,代码来源:GoogleTrendAnalyzerJSON.py

示例2: regression

    def regression(self):

        print self.people.head(n=1)
        self.people.rename(columns={'class': 'dbpedia_class'}, inplace=True) # all_bios is the dataframe with the consolidated data. somehow it doesn't work if the class column is named "class"

        self.logfile.write( "\n\n Num Regions NegativeBinomial")
        m = glm("numRegions ~ C(gender,Treatment(reference='male')) ", # + C(dbpedia_class,Treatment(reference='http://dbpedia.org/ontology/Person')) + birth_century
                data=self.people, family=families.NegativeBinomial()).fit()
        self.logfile.write( "\n AIC "+str(m.aic))
        self.logfile.write( "\n BIC "+str(m.bic))
        for table in m.summary().tables:
            self.logfile.write(table.as_latex_tabular())


        #lim_people = self.people[self.people.numRegions>0]
        self.logfile.write( "\n\n Num Regions OLS")
        m = ols("numRegions ~ C(gender,Treatment(reference='male')) ", # + C(dbpedia_class,Treatment(reference='http://dbpedia.org/ontology/Person')) + birth_century
                data=self.people).fit()
        self.logfile.write( "\n AIC "+str(m.aic))
        self.logfile.write( "\n BIC "+str(m.bic))
        for table in m.summary().tables:
            self.logfile.write(table.as_latex_tabular())



        # we could use beta regression for normalized entropy
        #print "\n\n Region Entropy"
        #m = ols("entropy ~ C(gender,Treatment(reference='male')) ", #+ C(dbpedia_class,Treatment(reference='http://dbpedia.org/ontology/Person')) + birth_century
        #        data=self.people).fit()
        #print m.summary() # <-- this gives you the table of coefficients with p-values, confidence intervals, and so on



        self.logfile.write( "\n\n Sum Temp Interest")
        m = ols("timeInterest ~ C(gender,Treatment(reference='male')) ", data=self.people).fit()
        self.logfile.write( "\n AIC"+str(+m.aic))
        for table in m.summary().tables:
            self.logfile.write(table.as_latex_tabular())


        self.logfile.write( "\n\n Pos Temp Interest")
        m = glm("timePosInterest ~ C(gender,Treatment(reference='male')) ", data=self.people, family=families.NegativeBinomial()).fit()
        self.logfile.write( "\n AIC "+str(m.aic))
        self.logfile.write( "\n BIC "+str(m.bic))
        for table in m.summary().tables:
            self.logfile.write(table.as_latex_tabular())

        #lim_people = self.people[self.people.timePosInterest>0]
        self.logfile.write( "\n\n Pos Temp Interest OLS")
        m = ols("timePosInterest ~ C(gender,Treatment(reference='male')) ", data=self.people).fit()
        self.logfile.write( "\n AIC "+str(m.aic))
        self.logfile.write( "\n BIC "+str(m.bic))
        for table in m.summary().tables:
            self.logfile.write(table.as_latex_tabular())
开发者ID:clauwag,项目名称:WikipediaGenderInequality,代码行数:54,代码来源:GoogleTrendAnalyzer.py

示例3: generate_regression_models

def generate_regression_models(df):
    # Using glm function in statsmodels.formula.api class to create regression models
    heart_deaths = sm.glm(formula="Heart_Disease_Deaths ~ Obesity + Binge_Drinking + Smoking + Primary_Care + No_Insurance + Median_Household_Income + College_Degrees + Long_Term_Care_Hospital_Admissions + Unemployed_Persons + Liquor_Stores", data=df).fit()
    cancer_deaths = sm.glm(formula="Cancer_Deaths ~ Obesity + Binge_Drinking + Smoking + Primary_Care + No_Insurance + Median_Household_Income + College_Degrees + Long_Term_Care_Hospital_Admissions + Unemployed_Persons + Liquor_Stores", data=df).fit()
    diabetes_deaths = sm.glm(formula="Diabetes_Deaths ~ Obesity + Smoking + Binge_Drinking + Primary_Care + No_Insurance + Median_Household_Income + College_Degrees + Long_Term_Care_Hospital_Admissions + Unemployed_Persons + Liquor_Stores", data=df).fit()
    resp_deaths = sm.glm(formula="Respiratory_Disease_Deaths ~ Obesity + Smoking + Binge_Drinking + Primary_Care + No_Insurance + Median_Household_Income + College_Degrees + Long_Term_Care_Hospital_Admissions + Unemployed_Persons + Liquor_Stores", data=df).fit()
   
    # Appending the different models to a list
    models = []
    models.append(heart_deaths)
    models.append(cancer_deaths)
    models.append(resp_deaths)
    models.append(diabetes_deaths)
    return models
开发者ID:akshat-verma,项目名称:ChronicDiseaseIndicators,代码行数:14,代码来源:analysis.py

示例4: multiple_linear_regression

def multiple_linear_regression():
    '''Multiple linear regression
    chapter 6.3, p. 98'''
    
    # get the data from the web
    inFile = r'GLM_data/Table 6.3 Carbohydrate diet.xls'
    df = get_data(inFile)
    
    # do the fit, for the original model ...
    model = ols('carbohydrate ~ age + weight + protein', data=df).fit()
    print model.summary()
    print anova_lm(model)

    # as GLM
    glm = glm('carbohydrate ~ age + weight + protein',
            family=Gaussian(), data=df).fit()
    print 'Same model, calculated with GLM'
    ''' The confidence intervals are different than those from OLS.
    The reason (from Nathaniel Smith):
    OLS uses a method that gives exact results, but only works in the special
    case where all the usual OLS criteria apply - iid Gaussian noise etc. GLM
    instead uses an approximate method which is correct asymptotically but may
    be off for small samples; the tradeoff you get in return is that this method
    works the same way for all GLM models, including those with non-Gaussian
    error terms and non-trivial link functions. So that's why they're different.
    '''

    print glm.summary()
    
    # ... and for model 1
    model1 = ols('carbohydrate ~ weight + protein', data=df).fit()
    print model1.summary()
    print anova_lm(model1)    
开发者ID:HunterAllman,项目名称:kod,代码行数:33,代码来源:code.py

示例5: logistic_regression

 def logistic_regression(self, use_glm=True):
     """
     (b) it seems the statistical significant predict variable is only Lag2. How disappointing...
     """
     formula = "Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume"
     model = (
         smf.glm(formula, data=self.df, family=sm.families.Binomial())
         if use_glm
         else smf.logit(formula, data=self.transformedDF)
     )
     result = model.fit()
     if use_glm:
         probs = result.fittedvalues
         """Beware the prob here is the index 0's prob, so we should use the lambda function below"""
         pred_values = probs.map(lambda x: 0 if x > 0.5 else 1)
     else:
         """The probability of being 1"""
         probs = Series(result.predict(sm.add_constant(self.df[["Lag1", "Lag2", "Lag3", "Lag4", "Lag5", "Volume"]])))
         pred_values = probs.map(lambda x: 1 if x > 0.5 else 0)
     """
     (c) Percentage of currect predictions: (54+557)/(54+557+48+430) = 56.1%.
         Weeks the market goes up the logistic regression is right most of the time, 557/(557+48) = 92.1%.
         Weeks the market goes up the logistic regression is wrong most of the time 54/(430+54) = 11.2%.
     """
     tp.output_table(pred_values.values, self.transformedDF[self.y_col].values)
开发者ID:Aran00,项目名称:ISIRExerciseCode,代码行数:25,代码来源:Exec10.py

示例6: test_all_methods

    def test_all_methods(self):
        x_cols = ["Lag2"]
        formula = "Direction~Lag2"
        # print self.df.shape[0]
        train_data = self.df.ix[(self.df["Year"] >= 1990) & (self.df["Year"] <= 2008), :]
        # print train_data.shape[0]
        """ (d) logistic"""
        model = smf.glm(formula, data=train_data, family=sm.families.Binomial())
        result = model.fit()
        test_data = self.df.ix[self.df["Year"] > 2008, :]
        probs = Series(result.predict(sm.add_constant(test_data[["Lag2"]])))
        pred_values = probs.map(lambda x: "Down" if x > 0.5 else "Up")
        tp.output_table(pred_values.values, test_data[self.y_col].values)

        train_X = train_data[x_cols].values
        train_y = train_data[self.y_col].values
        test_X = test_data[x_cols].values
        test_y = test_data[self.y_col].values
        """ (e) LDA """
        lda_res = LDA().fit(train_X, train_y)
        pred_y = lda_res.predict(test_X)
        tp.output_table(pred_y, test_y)
        """ (f) QDA """
        qda_res = QDA().fit(train_X, train_y)
        pred_y = qda_res.predict(test_X)
        tp.output_table(pred_y, test_y)
        """ (g) KNN """
        clf = neighbors.KNeighborsClassifier(1, weights="uniform")
        clf.fit(train_X, train_y)
        pred_y = clf.predict(test_X)
        tp.output_table(pred_y, test_y)
        """ (h) logistic and LDA """
        """ (i) Is the purpose of the last question going through all methods with no direction?"""
开发者ID:Aran00,项目名称:ISIRExerciseCode,代码行数:33,代码来源:Exec10.py

示例7: fit_model

    def fit_model(self,df, filters, model_expression):
        """
        Use statsmodels GLM to construct a model relation.

        Parameters
        ----------
        df : pandas.DataFrame
            Data to use for fit. Should contain all the columns
            referenced in the `model_expression`.
        filters : list of str
            Any filters to apply before doing the model fit.
        model_expression : str
            A patsy model expression that can be used with statsmodels.
            Should contain both the left- and right-hand sides.

        Returns
    -------
        fit : statsmodels.regression.linear_model.GLMSResults
        """
        df = util.apply_filter_query(df, filters)

        model=smf.glm(formula=model_expression, data=df, family=sm.families.Poisson())


        if len(model.exog) != len(df):
            raise ModelEvaluationError(
                'Estimated data does not have the same length as input.  '
                'This suggests there are null values in one or more of '
                'the input columns.')

        with log_start_finish('statsmodels GLM fit', logger):
            return model.fit()
开发者ID:xgitiaux,项目名称:urbansim,代码行数:32,代码来源:regression.py

示例8: fit_with_logistic

 def fit_with_logistic(self, threshold=0.5):
     formula = "%s~%s" % (self.y_col, "+".join(self.x_cols))
     model = smf.glm(formula, data=self.train_set, family=sm.families.Binomial())
     result = model.fit()
     predict_probs = result.predict(exog=self.test_set)
     real_values = self.test_set[self.y_col].map(lambda x: 1 if x == 'No' else 0)
     tp.output_table_with_prob(predict_probs, real_values, threshold=threshold, zero_one_col_texts=["Yes", "No"])
开发者ID:Aran00,项目名称:ISIRBookCode,代码行数:7,代码来源:CaravanTest.py

示例9: logistic_regression

def logistic_regression():
    '''Logistic regression example
    chapter 7.3, p 130
    [tbd]: the cloglog values are inconsistent with those mentioned in the book.
    This is probably due to the specific definitions of "loglog" and "cloglog"
    in the respective languages.
    '''
    
    inFile = r'GLM_data/Table 7.2 Beetle mortality.xls'
    df = get_data(inFile)
    
    # adjust the unusual column names in the Excel file
    colNames = [name.split(',')[1].lstrip() for name in df.columns.values]
    df.columns = colNames
    
    # fit the model
    df['tested'] = df['n']
    df['killed'] = df['y']
    df['survived'] = df['tested'] - df['killed']
    model = glm('survived + killed ~ x', data=df, family=Binomial()).fit()
    print model.summary()
    
    print '-'*65
    print 'Equivalent solution:'
    
    model = glm('I(n - y) + y ~ x', data=df, family=Binomial()).fit()
    print model.summary()    
    
    # The fitted number of survivors can be obtained by
    fits = df['n']*(1-model.fittedvalues)
    print 'Fits Logit:'
    print fits
    
    # The fits for other link functions are:
    model_probit = glm('I(n - y) + y ~ x', data=df, family=Binomial(links.probit)).fit()
    print model_probit.summary()
    
    fits_probit = df['n']*(1-model_probit.fittedvalues)
    print 'Fits Probit:'
    print fits_probit
    
    model_cll = glm('I(n - y) + y ~ x', data=df, family=Binomial(links.cloglog)).fit()
    print model_cll.summary()
    fits_cll = df['n']*(1-model_cll.fittedvalues)
    print 'Fits Extreme Value:'
    print fits_cll
开发者ID:HunterAllman,项目名称:kod,代码行数:46,代码来源:code.py

示例10: pred

def pred(working, rating):
	data = working[working['prosper_rating']==rating]
	#https://stackoverflow.com/questions/38250710/how-to-split-data-into-3-sets-train-validation-and-test
	#60%, 20% 20% for traing, test and validation
	train, validation, test = np.split(data.sample(frac=1), [int(.6*len(data)), int(.8*len(data))])
	print("total:{} train:{} test:{} validation:{}".format(len(data), len(train), len(validation), len(test)))
	mod = smf.glm('status ~ borrower_rate', data=train, family=sm.families.Binomial()).fit()

	print(test_model(mod, test))
开发者ID:jcongithub,项目名称:prosper,代码行数:9,代码来源:loan.py

示例11: report_glm

def report_glm(formula, data, verbose=True, **kwargs):
    """Fit GLM, print a report, and return the fit object."""
    results = smf.glm(formula, data=data, **kwargs).fit(disp=False, **kwargs)
    summary = results.summary()

    if verbose:
        report = """\n{summary}\n""".format(summary=summary)
        print(report)

    return results
开发者ID:xguse,项目名称:crunchers,代码行数:10,代码来源:lazy_stats.py

示例12: test_logit

 def test_logit(self):
     from statsmodels.formula.api import glm
     from statsmodels.genmod.families import Binomial
     
     inData = C13_2_logit.getData()
     dfFit = C13_2_logit.prepareForFit(inData)
     model = glm('ok + failed ~ temp', data=dfFit, family=Binomial()).fit()
     C13_2_logit.showResults(inData, model)
     
     self.assertAlmostEqual(model.params.Intercept, -15.042902, places=5)
开发者ID:ejmurray,项目名称:statsintro_python,代码行数:10,代码来源:test_CLprgs.py

示例13: regression

def regression():
    '''Poisson regression example
    chapter 4.4, p.69'''
    
    # get the data from the web
    inFile = r'GLM_data/Table 4.3 Poisson regression.xls'
    df = get_data(inFile)
    
    # do the fit
    p = glm('y~x', family=Poisson(links.identity), data=df)
    print p.fit().summary()    
开发者ID:HunterAllman,项目名称:kod,代码行数:11,代码来源:code.py

示例14: calculate_odds_ratio

def calculate_odds_ratio(genotypes, phen_vector1,phen_vector2,reg_type,covariates,response='',phen_vector3=''): #diff - done
	"""
	Runs the regression for a specific phenotype vector relative to the genotype data and covariates.

	:param genotypes: a DataFrame containing the genotype information
	:param phen_vector: a array containing the phenotype vector
	:param covariates: a string containing all desired covariates
	:type genotypes: pandas DataFrame
	:type phen_vector: numpy array
	:type covariates: string

	.. note::
		The covariates must be a string that is delimited by '+', not a list.
		If you are using a list of covariates and would like to convert it to the pyPhewas format, use the following::

			l = ['genotype', 'age'] # a list of your covariates
			covariates = '+'.join(l) # pyPhewas format

		The covariates that are listed here *must* be headers to your genotype CSV file. 
	"""

	data = genotypes
	data['y']=phen_vector1
	data['MaxAgeAtICD'] = phen_vector2
	#f='y~'+covariates
	if response:
		f = response+'~ y + ' + covariates
		if phen_vector3.any():
			data['phe'] = phen_vector3
			f = response + '~ y + phe +' + covariates
	else:
		f = 'y ~' + covariates
		if phen_vector3.any():
			data['phe'] = phen_vector3
			f = 'y ~ phe +' + covariates
	try:
		if reg_type==0:
			logreg = smf.logit(f,data).fit(method='bfgs',disp=False)
			p=logreg.pvalues.genotype
			odds=logreg.deviance	
			conf = logreg.conf_int()
			od = [-math.log10(p), logreg.params.genotype, '[%s,%s]' % (conf[0]['genotype'],conf[1]['genotype'])]
		else:
			linreg = smf.glm(f,data).fit(method='bfgs',disp=False)
			p=linreg.pvalues.genotype
			odds=0
			conf = linreg.conf_int()
			od = [-math.log10(p), linreg.params.genotype, '[%s,%s]' % (conf[0]['genotype'],conf[1]['genotype'])]
	except:
		odds=0
		p=np.nan
		od = [np.nan,np.nan,np.nan]
	return (odds,p,od)
开发者ID:BennettLandman,项目名称:pyPheWAS,代码行数:53,代码来源:pyPhewasCore.py

示例15: senility_and_WAIS

def senility_and_WAIS():
    '''Another example of logistic regression.
    chapter 7.8, p 143
    [tbd]: I don't understand how the "Binomial model" (grouped response)
    is supposed to work, in either language'''

    inFile = r'GLM_data/Table 7.8 Senility and WAIS.xls'
    df = get_data(inFile)
    
    # ungrouped
    model = glm('s ~ x', data=df, family=Binomial()).fit()
    print model.summary()    
开发者ID:HunterAllman,项目名称:kod,代码行数:12,代码来源:code.py


注:本文中的statsmodels.formula.api.glm函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。