本文整理汇总了Python中statsmodels.api.add_constant函数的典型用法代码示例。如果您正苦于以下问题:Python add_constant函数的具体用法?Python add_constant怎么用?Python add_constant使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了add_constant函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: inbound_forcast
def inbound_forcast(target, exchange, geo, exchange_test, geo_test, submit, i):
for col in col_list:
# 宿泊者数のカラム名を指定
target_col = col + suff
target.index = range(0, 365)
X = sm.add_constant(exchange, prepend=False)
X_test = sm.add_constant(exchange_test, prepend=False)
X.index = range(0,365)
for g in range(0, len(target)):
if target[target_col][g] == 0:
target[target_col][g] = 1
y = target[target_col].apply(np.log)
model = sm.OLS(y, X)
results = model.fit()
print(results.summary())
pred = results.predict()
Y = y - pred
L = len(Y)
fftY = fft.fft(Y)
freqs = fft.fftfreq(L)
power = np.abs(fftY)
phase = [np.arctan2(float(c.imag), float(c.real)) for c in fftY]
wave = newwave_i(L, results, pred, power, freqs, phase, X_test)
submit[i] = wave
i += 1
return submit, i
示例2: error_rate_for_model
def error_rate_for_model(test_model, train_set, test_set, infer=False,
infer_set=None):
"""Report error rate on test_doc sentiments, using
supplied model and train_docs"""
train_targets, train_regressors = \
zip(*[( doc.sentiment, test_model.docvecs[doc.tags[0]] )
for doc in train_set ])
train_regressors = sm.add_constant(train_regressors)
predictor = logistic_predictor_from_data(train_targets, train_regressors)
test_data = test_set
if infer:
#if infer_subsample < 1.0:
# test_data = sample(test_data,
# int(infer_subsample * len(test_data)))
#test_regressors = [test_model.infer_vector(doc.words,
# steps=infer_steps, alpha=infer_alpha)
# for doc in test_data]
test_data = [SentimentDocument(None, None, None, s)
for (v, s) in infer_set]
test_regressors = [v for (v, s) in infer_set]
else:
test_regressors = [test_model.docvecs[doc.tags[0]]
for doc in test_set]
test_regressors = sm.add_constant(test_regressors)
# predict & evaluate
test_predictions = predictor.predict(test_regressors)
corrects = ( sum(np.rint(test_predictions) ==
[doc.sentiment for doc in test_data] ))
errors = len(test_predictions) - corrects
error_rate = float(errors) / len(test_predictions)
return (error_rate, errors, len(test_predictions), predictor)
示例3: __init__
def __init__(self):
self.researched = util.source.read('F-F_Research_Data_5_Factors_2x3')
self.portfolios = util.source.read('25_Portfolios_5x5')
self.simpleFactor = sm.add_constant(self.researched.Mkt_RF)
self.threeFactor = sm.add_constant(self.researched[['Mkt_RF', 'SMB', 'HML']])
self.fourFactor = sm.add_constant(self.researched[['Mkt_RF', 'SMB', 'RMW', 'CMA']])
self.fiveFactor = sm.add_constant(self.researched[['Mkt_RF', 'SMB', 'HML', 'RMW', 'CMA']])
示例4: linear_model_plot
def linear_model_plot(x_variable, y_variable):
'''Function develops linear model for x and y variable inputs and plots regression line on top of scatter plot'''
assert len(x_variable) > 1, 'length of x_variable should be larger than 1'
assert len(y_variable) > 1, 'length of y_variable should be larger than 1'
# assigning function variables to response and predictor variables
y = y_variable # response variable
X = x_variable # predictor variable
X = sm.add_constant(X) # Adds a constant term to the predictor (essential to obtain the constant in the formula)
# Calculating the linear model for the two variables
lm = sm.formula.OLS(y, X).fit()
# Developing the plot of the linear model
# making a range of the x variable to pass to the y prediction
x_pred = np.linspace(x_variable.min(), x_variable.max())
# Adding a constant to this range of x values (essential to obtain the constant in the formula)
x_pred2 = sm.add_constant(x_pred)
# Passing the linear model predictor the range of x values to model over
y_pred = lm.predict(x_pred2)
# Plotting these predicitons on the graph
plt.plot(x_pred, y_pred, color='k', linewidth=2)
# Obtaining linear regression
return plt.plot()
示例5: predict
def predict(self, test_X):
dataset = self.__dataset
intercept = self.__intercept
XX_inv = self.__XX_inv
beta = self.__beta
train_X = sm.add_constant(dataset[:, :-1]) if intercept else dataset[:, :-1]
test_X = sm.add_constant(vec(test_X)) if intercept else vec(test_X)
train_Y = dataset[:, -1:]
train_pred = np.dot(train_X, beta)
# Confidence interval
sig = (np.linalg.norm(train_Y-train_pred)**2/(train_X.shape[0]-train_X.shape[1]+1))**0.5
s = []
for row in range(test_X.shape[0]):
x = test_X[[row], :]
s.append(sig*(1 + np.dot(np.dot(x, XX_inv), x.T))**0.5)
s = np.reshape(np.asarray(s), (test_X.shape[0], 1))
test_pred = np.dot(test_X, beta)
hi_ci = test_pred + 2*s
lo_ci = test_pred - 2*s
return test_pred, hi_ci, lo_ci
示例6: overfit_stocks
def overfit_stocks():
# Load one year's worth of pricing data for five different assets
start = datetime.date(1,1,2013)
end = datetime.datetime(1,1,2014)
x1 = get_pricing('PEP', )
x2 = get_pricing('MCD', fields='price', start_date=start, end_date=end)
x3 = get_pricing('ATHN', fields='price', start_date=start, end_date=end)
x4 = get_pricing('DOW', fields='price', start_date=start, end_date=end)
y = get_pricing('PG', fields='price', start_date=start, end_date=end)
#
# Build a linear model using only x1 to explain y
slr = regression.linear_model.OLS(y, sm.add_constant(x1)).fit()
slr_prediction = slr.params[0] + slr.params[1]*x1
#
# Run multiple linear regression using x1, x2, x3, x4 to explain y
mlr = regression.linear_model.OLS(y, sm.add_constant(np.column_stack((x1,x2,x3,x4)))).fit()
mlr_prediction = mlr.params[0] + mlr.params[1]*x1 + mlr.params[2]*x2 + mlr.params[3]*x3 + mlr.params[4]*x4
#
# Compute adjusted R-squared for the two different models
print('SLR R-squared: %.5f' %slr.rsquared_adj)
print('SLR p-value: %.5f' %slr.f_pvalue)
print('MLR R-squared: %.5f' %mlr.rsquared_adj)
print('MLR p-value: %.5f' %mlr.f_pvalue)
#
# Plot y along with the two different predictions
y.plot()
slr_prediction.plot()
mlr_prediction.plot()
plt.ylabel('Price')
plt.xlabel('Date')
plt.legend(['PG', 'SLR', 'MLR']);
示例7: GetCoef
def GetCoef(start_train, end_train, StockReturns, CarhartDaily, SP500Returns, DataFolder):
if os.path.isfile(r'%s\Coef_%s_%s.csv' % (DataFolder, start_train.date(), end_train.date())):
Coef = pd.read_csv(r'%s\Coef_%s_%s.csv' % (DataFolder, start_train.date(), end_train.date()))
return Coef
else:
Coef = pd.DataFrame()
for ticker in StockReturns.ticker.unique():
print "Getting regression coefficient for %s" % ticker
tmpReturn = StockReturns[(StockReturns.ticker == ticker)]
if not tmpReturn.empty:
tmpData = tmpReturn.merge(CarhartDaily, left_on = 'endDate', right_on = 'date')
tmpData = tmpData.merge(SP500Returns, on = 'endDate')
tmpData['SP500-RF'] = tmpData['SP500Return']*100 - tmpData['RF']
y = tmpData['return']*100 - tmpData['RF']
X1 = tmpData[['Mkt-RF', 'SMB', 'HML', 'UMD']]
X2 = tmpData[['Mkt-RF']]
X3 = tmpData[['SP500-RF']]
X1 = sm.add_constant(X1)
X2 = sm.add_constant(X2)
X3 = sm.add_constant(X3)
model1 = sm.OLS(y, X1).fit()
model2 = sm.OLS(y, X2).fit()
model3 = sm.OLS(y, X3).fit()
tmpDF1 = pd.DataFrame(model1.params).T
tmpDF1.rename( columns = {'const' : 'alphaFF'}, inplace = True)
tmpDF2 = pd.DataFrame(model2.params).T
tmpDF2.rename( columns = {'const' : 'alphaCAPM', 'Mkt-RF' : 'Mkt-RF_only'}, inplace = True)
tmpDF3 = pd.DataFrame(model3.params).T
tmpDF3.rename( columns = {'const' : 'alphaSP500'}, inplace = True )
tmpDF = pd.concat((tmpDF1, tmpDF2, tmpDF3), axis = 1)
tmpDF['ticker'] = ticker
Coef = Coef.append(tmpDF)
Coef.to_csv(r'%s\Coef_%s_%s.csv' % (DataFolder, start_train.date(), end_train.date()), index = False)
print 'Finished saving regression coefficients to: %s\Coef_%s_%s.csv' % (DataFolder, start_train.date(), end_train.date())
return Coef
示例8: regression
def regression(json_data, bandwidth):
latency = []
rtt_by_size = []
# RTT object
# rtt = {[avg_rtt1]: [rtt1, rtt2, rtt3, ..., rtcx],
# [avg_rtt2]: [...]}
for i in range(0, len(json_data)):
latency.append(json_data[i]["latency"])
rtt_by_size.append(json_data[i]["size"] * json_data[i]["rtt"])
y = np.array(bandwidth).astype(np.float)
z = np.array(latency).astype(np.float)
r = np.array(rtt_by_size).astype(np.float)
data = np.array([rtt_by_size, y])
ones = np.ones(len(data[0]))
X = sm.add_constant(np.column_stack((data[0], ones)))
for ele in data[1:]:
X = sm.add_constant(np.column_stack((ele, X)))
results = sm.OLS(z, X).fit()
print results.summary()
示例9: reg_m
def reg_m(y, x):
ones = np.ones(len(x[0]))
X = sm.add_constant(np.column_stack((x[0], ones)))
for ele in x[1:]:
X = sm.add_constant(np.column_stack((ele, X)))
results = sm.OLS(y, X).fit()
return results
示例10: test_plot_influence
def test_plot_influence(self, close_figures):
infl = self.res.get_influence()
fig = influence_plot(self.res)
assert_equal(isinstance(fig, plt.Figure), True)
# test that we have the correct criterion for sizes #3103
try:
sizes = fig.axes[0].get_children()[0]._sizes
ex = sm.add_constant(infl.cooks_distance[0])
ssr = sm.OLS(sizes, ex).fit().ssr
assert_array_less(ssr, 1e-12)
except AttributeError:
import warnings
warnings.warn('test not compatible with matplotlib version')
fig = influence_plot(self.res, criterion='DFFITS')
assert_equal(isinstance(fig, plt.Figure), True)
try:
sizes = fig.axes[0].get_children()[0]._sizes
ex = sm.add_constant(np.abs(infl.dffits[0]))
ssr = sm.OLS(sizes, ex).fit().ssr
assert_array_less(ssr, 1e-12)
except AttributeError:
pass
assert_raises(ValueError, influence_plot, self.res, criterion='unknown')
示例11: weak_instruments
def weak_instruments(self, n_sims=20):
np.random.seed(1692)
model = feedforward.FeedForwardModel(19, 1, dense_size=60, n_dense_layers=2)
treatment_effects = []
ols_betas, ols_ses = [], []
old_corrs, new_corrs = [], []
for _ in xrange(n_sims):
df = self.treatment_gen.simulate_data(False)
X = np.hstack((self.x, df['new_treat'].values[:, None]))
Z = np.hstack((self.x, df['instrument'].values[:, None]))
ols_beta, ols_se = self.fit_ols(df['treatment_effect'], X)
ols_betas.append(ols_beta)
ols_ses.append(ols_se)
old_corr = df[['instrument', 'new_treat']].corr().values[0, 1]
new_instrument, new_corr = model.fit_instruments(X, Z, df['treatment_effect'].values, batchsize=128)
new_corrs.append(new_corr)
old_corrs.append(old_corr)
Z2 = Z.copy()
Z2[:, -1] = new_instrument[:, 0]
iv = IV2SLS(df['treatment_effect'].values.flatten(), add_constant(X), add_constant(Z2))
model.reset_params()
if new_corr:
logger.info("Old corr: %.2f, New corr: %.2f", np.mean(old_corrs), np.mean(new_corrs))
logger.info("Treatment effect (OLS): %.3f (%.4f)", np.mean(ols_betas), np.mean(ols_ses))
logger.info("Treatment effect: %.3f (%.4f)", np.mean(treatment_effects), np.std(treatment_effects))
示例12: get_z_LinearRegression
def get_z_LinearRegression(self,xo,yo):
print 'linear regression'
dist_sigma = 1000
xx= self.Knots[:,0]
yy= self.Knots[:,1]
dd = np.sqrt((xx-xo)**2 + (yy-yo)**2)
print "dd",dd
exponent = -(dd**2)/(2*(dist_sigma**2))
print "exponent", exponent
weights = np.exp(exponent)
print "weights",weights
X = self.Knots[:,0:2]
X = sm.add_constant(X)
y = self.Knots[:,2]
mod_wls = sm.WLS(y, X, weights=weights)
res_wls = mod_wls.fit()
print(res_wls.summary())
p = np.zeros((2,2),dtype=X.dtype)
p[0,0] = xo
p[0,1] = yo
p[1,0] = xo
p[1,0] = yo
p = sm.add_constant(p)
z = res_wls.predict(p)
print "zshape",z
return z[0]
示例13: make_g_model
def make_g_model(daily_results, daily_projections):
daily_results_common = unify_dfs(daily_results)
dfm = create_master(daily_results_common)
dfm['NF'] = dfm['NF'].astype(float)
dfm = eliminate_zeros(dfm)
X = pd.get_dummies(dfm[['Salary', 'RG', 'NF', 'RW', 'POS', 'Depth']])
X = pd.concat([X.drop('Depth',1), pd.get_dummies(X['Depth'])], 1)
if 'POS_' in X.columns:
X.drop('POS_', axis=1, inplace=True)
#if 3 in X.columns:
#X.drop(3, axis=1, inplace=True)
print X.columns
X = sm.add_constant(X)
info = dfm[['Player', 'Date', 'Time']]
y = dfm['FD']
model=sm.OLS(y, X).fit()
today = daily_projections[date_string]
X = pd.get_dummies(today[['Salary', 'RG', 'NF', 'RW', 'POS', 'Depth']])
X = pd.concat([X.drop('Depth',1), pd.get_dummies(X['Depth'])], 1)
print X.columns
if 'POS_' in X.columns:
X.drop('POS_', axis=1, inplace=True)
X = sm.add_constant(X)
g_model = model.predict(X)
return g_model
示例14: scatter
def scatter(filename, x, y, line=True, xr=None, yr=None, x_title='', y_title='', title=None):
if title is None:
title = filename
plt.figure(figsize=(24,18), dpi=600)
plt.scatter(x, y)
if xr is not None:
plt.xlim(xr)
if yr is not None:
plt.ylim(yr)
if line:
est = sm.OLS(y, sm.add_constant(x)).fit()
x_prime = np.linspace(min(x), max(x), 100)[:, np.newaxis]
x_prime = sm.add_constant(x_prime)
y_hat = est.predict(x_prime)
line_plot1 = plt.plot(x_prime[:, 1], y_hat, 'r', alpha=0.9, label='r^2 = %s' % est.rsquared)
#res = linregress(x,y)
#line_plot2 = plt.plot([min(x), max(x)], [res[0]*min(x)+res[1], res[0]*max(x)+res[1]],
# 'g', alpha=0.9, label='r^2 = %s' % res[2])
plt.legend(['r^2 = %s' % est.rsquared])
plt.xlabel(x_title)
plt.ylabel(y_title)
plt.title(title)
plt.savefig('%s.png' % filename, format='png')
plt.savefig('%s.eps' % filename, format='eps')
plt.close()
示例15: test_mvl_fuse_function
def test_mvl_fuse_function(self):
Y, D, P, T, G = generate_raw_samples()
T = sm.add_constant(T, prepend=False)
P = sm.add_constant(P, prepend=False)
D = sm.add_constant(D, prepend=False)
G = sm.add_constant(G, prepend=False)
loo = LeaveOneOut(len(Y))
er = []
for train_idx, test_idx in loo:
tm = taxi_view_model(train_idx, Y, T)
pm = poi_view_model(train_idx, Y, P)
gm = geo_view_model(train_idx, Y, G)
dm = demo_view_model(train_idx, Y, D)
models = [tm, pm, gm, dm]
lm = mvl_fuse_function(models, train_idx, Y)
tm_test = tm[0].predict(T[test_idx])
pm_test = pm[0].predict(P[test_idx])
gm_test = gm[0].predict(G[test_idx])
dm_test = dm[0].predict(D[test_idx])
newX_test = np.array([1, tm_test, pm_test, gm_test, dm_test])
ybar = lm.predict(newX_test)
y_error = ybar - Y[test_idx]
# if np.abs(y_error / Y[test_idx]) > 0.8:
# print test_idx, ybar, Y[test_idx], newX_test
er.append(y_error)
mre = np.mean(np.abs(er)) / np.mean(Y)
print "MVL with linear fusion function MRE: {0}".format(mre)
self.visualize_prediction_error(er, Y, "MVL linear combination")