本文整理汇总了Python中sklearn.linear_model.LassoCV.predict方法的典型用法代码示例。如果您正苦于以下问题:Python LassoCV.predict方法的具体用法?Python LassoCV.predict怎么用?Python LassoCV.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.linear_model.LassoCV
的用法示例。
在下文中一共展示了LassoCV.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: bagging
# 需要导入模块: from sklearn.linear_model import LassoCV [as 别名]
# 或者: from sklearn.linear_model.LassoCV import predict [as 别名]
def bagging(self,trains,tests,train_y,model_name=None):
blend_train = trains.T
bclf = LassoCV(n_alphas=100, alphas=None, normalize=True, cv=5, fit_intercept=True, max_iter=10000, positive=True)
bclf.fit(blend_train, train_y)
y_test_predict = bclf.predict(tests.T)
train_predict = bclf.predict(trains.T)
return train_predict,y_test_predict
示例2: lasso_cv
# 需要导入模块: from sklearn.linear_model import LassoCV [as 别名]
# 或者: from sklearn.linear_model.LassoCV import predict [as 别名]
def lasso_cv(x, y, x_pred=None, max_deg=3, cv=10, max_iter=1e3, return_model=False):
"""LASSO polynomial fit with cross-validation.
Regularized polynomial regression (by penalized least-squares) from a
range of degrees up to n = max_deg. The LASSO regression minimises MSE and
penalizes the size of the parameter vector using L1-norm, which leads to
fewer coefficients in the fitted model.
- The 'alpha' parameter (amount of penalization) is selected by k-fold CV.
- Predicts fitted model on given values 'x_pred' (default use 'x').
- Supports NaNs.
"""
ind, = np.where((~np.isnan(x)) & (~np.isnan(y)))
x_, y_ = x[ind], y[ind]
X_ = dmatrix('C(x_, Poly)')
if x_pred is None:
X = dmatrix('C(x, Poly)') # predict on original values
else:
X = dmatrix('C(x_pred, Poly)') # predict on given values
lasso = LassoCV(cv=cv, copy_X=True, normalize=True, max_iter=max_iter)
lasso = lasso.fit(X_[:,1:max_deg+1], y_)
y_pred = lasso.predict(X[:,1:max_deg+1])
if return_model:
y_pred = [y_pred, lasso]
return y_pred
示例3: lassoCV_regression
# 需要导入模块: from sklearn.linear_model import LassoCV [as 别名]
# 或者: from sklearn.linear_model.LassoCV import predict [as 别名]
def lassoCV_regression(data,target,alphas):
clf=LassoCV()
sfm = SelectFromModel(clf, threshold=0.25)
sfm.fit(data, target)
n_features = sfm.transform(data).shape[1]
while n_features > 2:
sfm.threshold += 0.1
data_transform = sfm.transform(data)
n_features = data_transform.shape[1]
rmses=[]
kf=KFold(len(target),10,True,None)
for train_index, test_index in kf:
data_train,data_test=data_transform[train_index],data_transform[test_index]
target_train,target_test=target[train_index],target[test_index]
clf.fit(data_train,target_train)
rmse=sqrt(np.mean((clf.predict(data_test)-target_test)**2))
rmses.append(rmse)
x0=np.arange(1,11)
plt.figure()
plt.plot(x0,rmses,label='LassoCV')
plt.legend()
plt.show()
return rmses
示例4: remove_foreground_glm
# 需要导入模块: from sklearn.linear_model import LassoCV [as 别名]
# 或者: from sklearn.linear_model.LassoCV import predict [as 别名]
def remove_foreground_glm(
x, y,
spatial_mask=None, spectral_mask=None,
alphas=None, l1_ratio=1.):
"""Summary
Args:
x (TYPE): Description
y (TYPE): Description
spatial_mask (TYPE, optional): Description
spectral_mask (TYPE, optional): Description
alphas (TYPE, optional): Description
Returns:
TYPE: Description
"""
# cast to double and reshape
x_rs = np.float64(x.reshape((x.shape[0], -1))).T
y_rs = np.float64(y.flatten())
if spatial_mask is None:
spatial_mask_rs = np.ones_like(y_rs, dtype=bool)
else:
spatial_mask_rs = spatial_mask.flatten()
if spectral_mask is None:
spectral_mask = np.ones(x_rs.shape[1], dtype=bool)
if alphas is not None:
alphas = np.atleast_1d(alphas)
# fit GLM
if l1_ratio == 1.:
reg = LassoCV(
positive=True,
alphas=alphas,
n_jobs=-1,
max_iter=5000
)
elif l1_ratio == 0.:
reg = RidgeCV(
alphas=alphas,
)
else:
reg = ElasticNetCV(
positive=True,
alphas=alphas,
n_jobs=-1,
l1_ratio=l1_ratio
)
reg.fit(x_rs[spatial_mask_rs][:, spectral_mask], y_rs[spatial_mask_rs])
y_model = reg.predict(x_rs[:, spectral_mask]).reshape(y.shape)
glm_coeffs = np.zeros(x_rs.shape[1], dtype=np.float32)
glm_coeffs[spectral_mask] += reg.coef_
return y_model, reg, glm_coeffs
示例5: predict
# 需要导入模块: from sklearn.linear_model import LassoCV [as 别名]
# 或者: from sklearn.linear_model.LassoCV import predict [as 别名]
def predict(self,trains_x,train_y,tests_x,parameters,times=10,isFile=True,foldername="blend-dir"):
"""
Ensamble many features and regression
:params train_X: dictionary for training
:params train_y: testing vector
"""
#parameter_get
test_data_sample = tests_x.values()[0]
if not os.path.exists(foldername):
os.makedirs(foldername)
skf = None
kfold_file = foldername + "/kfold_index.pkl"
if os.path.exists(kfold_file):
skf = pickle.load(open(kfold_file,"r"))
else:
skf = KFold(n=len(train_y),n_folds=times,shuffle=True)
pickle.dump(skf,open(kfold_file,"w"))
blend_train = np.zeros((len(train_y),len(parameters)))
blend_test = np.zeros((len(test_data_sample),len(parameters)))
for j,parameter in enumerate(parameters):
train_x = trains_x[parameter['data']]
test_x = tests_x[parameter['data']]
blend_test_tmp = np.zeros((len(test_data_sample),len(parameters)))
#file path check
for i, (train_index,valid_index) in enumerate(skf):
clf = model_select(parameter['parameter'])
train = train_x[train_index]
train_valid_y = train_y[train_index]
kfold_filepath = "./" + foldername + "/parameter_{}_kfold_{}.pkl".format(j,i)
if os.path.exists(kfold_filepath):
blend_train_prediction,blend_test_prediction = pickle.load(open(kfold_filepath,"r"))
blend_train[train_index,j] = np.expm1(clf.predict(train))
blend_test_tmp[:,i] = np.expm1(clf.predict(test_x))
else:
clf.fit(train,np.log1p(train_valid_y))
blend_train_prediction = np.expm1(clf.predict(train))
blend_test_prediction = np.expm1(clf.predict(test_x))
pickle.dump((blend_train_prediction,blend_test_prediction),open(kfold_filepath,"w"))
blend_train[train_index,j] = blend_train_prediction
blend_test_tmp[:,i] = blend_test_prediction
blend_test[:,j] = blend_test_tmp.mean(1)
#Blending Model
bclf = LassoCV(n_alphas=100, alphas=None, normalize=True, cv=5, fit_intercept=True, max_iter=10000, positive=True)
bclf.fit(blend_train, train_y)
y_test_predict = bclf.predict(blend_test)
return y_test_predict
示例6: fit_Lasso
# 需要导入模块: from sklearn.linear_model import LassoCV [as 别名]
# 或者: from sklearn.linear_model.LassoCV import predict [as 别名]
def fit_Lasso(features_train, labels_train, features_pred):
model = LassoCV()
model.fit(features_train, labels_train)
mse = model.mse_path_
print "LASSO - Mean square error: ", mse.shape
# Test the model
labels_pred = model.predict(features_pred)
return labels_pred
示例7: make_model_and_predict
# 需要导入模块: from sklearn.linear_model import LassoCV [as 别名]
# 或者: from sklearn.linear_model.LassoCV import predict [as 别名]
def make_model_and_predict(train_file, test_file):
"""Given name of training csv file, name of test csv file, constructs
a random forest model and outputs predictions to a time-stampled csv file.
If the test_file has SalaryNormalized as an attribute, it will score the
model and write the result in the file "score<datetime>"
"""
train = pd.read_csv(train_file)
valid = pd.read_csv(test_file)
number_of_word_features = 200
title_words = count_words_in_column(train, "Title")
key_count_pairs = [(k,v) for (k,v) in title_words.items() if k not in
stopwords.words('english')]
key_count_pairs.sort(key=lambda (k,v): -v)
for word, count in key_count_pairs[:number_of_word_features]:
add_appearance_count_feature(train, word, "Title")
add_appearance_count_feature(valid, word, "Title")
group_features = ["LocationNormalized", "Category", "Company", "SourceName"]
for f in group_features:
continuize_feature(train, valid, f, "SalaryNormalized")
feature_columns = train.columns[12:]
feature=train[feature_columns]
label=train.SalaryNormalized
clf = LassoCV()
clf.fit(feature, label)
valid_salary_predict = clf.predict(valid[feature_columns])
valid["SalaryNormalized_Predict"] = valid_salary_predict
date_string = re.sub("[ :.]", "", str(datetime.datetime.now()))
predict_filename = 'predict' + date_string + '.csv'
score_filename = 'score' + date_string + '.txt'
with open(predict_filename,'wb') as f:
valid[["Id","SalaryNormalized_Predict"]].to_csv(f, index=False,
header=False)
##Computes average RMS error and writes score to file
if hasattr(valid, 'SalaryNormalized'):
score = 0
for i,_ in enumerate(valid["SalaryNormalized_Predict"]):
score += (valid.SalaryNormalized[i] -
valid.SalaryNormalized_Predict[i]) **2
score = math.sqrt(score/len(valid["SalaryNormalized_Predict"]))
with open (score_filename, 'wb') as f:
f.write("Train: " + train_file + "\n")
f.write("Test: " + test_file + "\n")
f.write("Score: " + str(score) + "\n")
示例8: get_model_per_cluster
# 需要导入模块: from sklearn.linear_model import LassoCV [as 别名]
# 或者: from sklearn.linear_model.LassoCV import predict [as 别名]
def get_model_per_cluster(X, Y):
model_per_cluster = {}
for c in X.cluster.unique():
X_cluster = X[X.cluster==c]
Y_true = Y[Y.cluster == c].ALSFRS_slope
regr = LassoCV(cv=5)
regr.fit(X_cluster, Y_true)
print 'cluster: %d size: %s' % (c, Y_true.shape)
Y_predict = regr.predict(X_cluster)
print "\t RMS error (0 is perfect): %.2f" % np.sqrt(np.mean(
(Y_predict - Y_true) ** 2))
regression_SS = ((Y_predict - Y_true) ** 2).sum()
residual_SS =((Y_true - Y_true.mean()) ** 2).sum()
print '\t coefficient of determination R^2 = %.2f ' % (1.0 - regression_SS/residual_SS) # regr.score(X_cluster, Y_true)
cov = sum((Y_predict - Y_predict.mean())*(Y_true - Y_true.mean()))
Y_predict_std = np.sqrt(sum((Y_predict - Y_predict.mean())**2))
Y_true_std = np.sqrt(sum((Y_true - Y_true.mean())**2))
print '\t pearson correlation r = %.2f ' % (cov/(Y_predict_std*Y_true_std)) # scipy.stats.pearsonr(Y_predict, Y_true)[0]
print "3 sample predictions: ", regr.predict(X_cluster)[:3]
model_per_cluster[c] = {"cluster_train_data_means": X_cluster.mean(), "model" : regr}
return model_per_cluster
示例9: lassoRegularization
# 需要导入模块: from sklearn.linear_model import LassoCV [as 别名]
# 或者: from sklearn.linear_model.LassoCV import predict [as 别名]
def lassoRegularization(X,Y):
"""
:param X: data consisting of features (excluding class variable)
:param Y: column vector consisting of class variable
:return: report best RMSE value for lasso regularization
"""
tuningAlpha = [0.1,0.01,0.001]
lasso = LassoCV(normalize=True, alphas=tuningAlpha, cv=10)
lasso.fit(X,Y)
prediction = lasso.predict(X)
print
print "LASSO REGULARIZATION"
print "Best Alpha value for Lasso Regularization : " + str(lasso.alpha_)
print 'Best RMSE for corresponding Alpha =', np.sqrt(mean_squared_error(Y, prediction))
示例10: __init__
# 需要导入模块: from sklearn.linear_model import LassoCV [as 别名]
# 或者: from sklearn.linear_model.LassoCV import predict [as 别名]
class LocalRegression:
"""This class implements "local" regression. Given a set of training data and a set of unknown data,
iterate through each unknown spectrum, find the nearest training spectra, and generate a model.
Each of these local models is optimized using built-in cross validation methods from scikit."""
def __init__(self, params, n_neighbors = 250):
"""Initialize LocalRegression
Arguments:
params = Dict containing the keywords and parameters for the regression method to be used.
Keyword arguments:
n_neighbors = User-specified number of training spectra to use to generate the local regression model for each
unknown spectrum.
"""
self.model = LassoCV(**params) # For now, the only option is LASSO. Other methods to be added in the future
# params is a dict containing the keywords and parameters for LassoCV
self.neighbors = NearestNeighbors(n_neighbors=n_neighbors)
def fit_predict(self,x_train,y_train, x_predict):
"""Use local regression to predict values for unknown data.
Arguments:
x_train = The training data spectra.
y_train = The values of the quantity being predicted for the training data
x_predict = The unknown spectra for which y needs to be predicted.
"""
self.neighbors.fit(x_train)
predictions = []
coeffs = []
intercepts = []
for i in range(x_predict.shape[0]):
print('Predicting spectrum ' + str(i + 1))
x_temp = np.array(x_predict[i])
foo, ind = self.neighbors.kneighbors([x_temp])
x_train_local = np.squeeze(x_train[ind])
y_train_local = np.squeeze(y_train[ind])
cv = GroupKFold(n_splits=3)
cv = cv.split(x_train_local, y_train_local,
groups=y_train_local)
self.model.fit(x_train_local, y_train_local)
predictions.append(self.model.predict([x_temp])[0])
coeffs.append(self.model.coef_)
intercepts.append(self.model.intercept_)
return predictions, coeffs, intercepts
示例11: lassocvclassifier
# 需要导入模块: from sklearn.linear_model import LassoCV [as 别名]
# 或者: from sklearn.linear_model.LassoCV import predict [as 别名]
def lassocvclassifier(training_samples, eval_samples, vectorizer, do_grid_search=False):
X_train, Y_train = training_samples
X_eval, Y_eval = eval_samples
#clf = SGDClassifier(loss='log', penalty= 'l2',l1_ratio=0.0, n_iter=30, shuffle=True, verbose=False,
# n_jobs=4, alpha=1e-4, average=True, class_weight=None)
clf = LassoCV()
clf.fit(X_train, Y_train)
#y_train_true, y_train_pred = Y_train, clf.predict(X_train)
print_top_10_words = True
scores = cross_validation.cross_val_score(clf, X_train, Y_train, cv=5, n_jobs=5, scoring='log_loss')
print scores, np.mean(scores), np.median(scores)
print(clf)
#scores = cross_validation.cross_val_score(clf.best_estimator_, X_train, Y_train, cv=10, scoring='log_loss')
#print scores, np.mean(scores), np.median(scores)
y_true, y_pred = Y_eval, clf.predict(X_eval)
y_prob = clf.predict_proba(X_eval)
示例12: MovieTrainer
# 需要导入模块: from sklearn.linear_model import LassoCV [as 别名]
# 或者: from sklearn.linear_model.LassoCV import predict [as 别名]
class MovieTrainer(object):
def __init__(self,training_file,test_file):
self._training_pickle=training_file
self._test_pickle=test_file
#to be defined later
self._list_of_dicts=None
self._dataframe=None
self._features=None
self._test_features=None
self._labels=None
self._clf=None
self._training_frame=None
self._test_frame=None
self._prediction_frame=None
#dicts
self._actor_dict=None
self._director_dict=None
self._genre_dict=None
self._production_house=None
def _load_dataframe(self):
if os.path.isfile(self._training_pickle) ==True:
self._training_dict=pickle.load(file(self._training_pickle))
else:
raise AttributeError("Cannot find pickle file:%s"%self._training_pickle)
if os.path.isfile(self._test_pickle) ==True:
self._test_dict=pickle.load(file(self._test_pickle))
else:
raise AttributeError("Cannot find pickle file:%s"%self._test_pickle)
#load pandas frame
self._training_frame=pd.DataFrame(self._training_dict)
self._test_frame=pd.DataFrame(self._test_dict)
#drop movies with no names
self._training_frame.dropna(subset=["moviename"])
self._test_frame.dropna(subset=["moviename"])
return
#raise error?
def _addtodict(self,name,this_dict):
if this_dict.has_key(name):
this_dict[name]+=1
else:
this_dict[name]=1
return
def _modify_string(self,playername):
playername = re.sub('^\s+|\s+$','', playername)
playername=re.sub('\s+','_',playername)
playername=re.sub('\*','',playername)
return playername
#this function creates a list of features
#corresponding to the most frequent actors
#in a movie
def _create_playerdict(self,frame,colname,num_features):
playerdict={}
for index in frame.index:
#for each row, we have list of actors
#like ['Sandra Bullock', 'Melissa McCarthy']
playerlist=frame.ix[index,colname]
if type(playerlist)!=float:
#only actors have multiple list members, other players
#like director don't
if colname=="actors":
for playername in playerlist:
#remove spaces, *, leading trailing spaces
playername=self._modify_string(playername)
self._addtodict(playername,playerdict)
else:
playerlist=self._modify_string(playerlist)
self._addtodict(playerlist,playerdict)
counter=0
feature_list=[]
#sort the dict to get players with highest number of movies
for key,value in sorted(playerdict.items(),key=lambda x:x[1],reverse=True):
#print key,value
feature_list.append(key)
counter+=1
if counter>num_features:
break
return feature_list
#.........这里部分代码省略.........
示例13: LassoCV
# 需要导入模块: from sklearn.linear_model import LassoCV [as 别名]
# 或者: from sklearn.linear_model.LassoCV import predict [as 别名]
test_data["casual_log"], feature_engg_linreg_model.predict(test_data.drop(target, axis=1))
)
# Not much difference? > Doesn't look like we are overfitting!
# But how to perform shrinkage/penalized regression in general?
from sklearn.linear_model import LassoCV
feature_engg_lassocv_model = LassoCV(max_iter=50, cv=3, n_jobs=-1, random_state=42)
feature_engg_lassocv_model.fit(train_data.drop(target, axis=1), train_data["casual_log"])
feature_engg_lassocv_mse_train = metrics.mean_squared_error(
train_data["casual_log"], feature_engg_lassocv_model.predict(train_data.drop(target, axis=1))
)
feature_engg_lassocv_mse_test = metrics.mean_squared_error(
test_data["casual_log"], feature_engg_lassocv_model.predict(test_data.drop(target, axis=1))
)
# Check the performance on test set
print feature_engg_linreg_mse_test
print feature_engg_lassocv_mse_test
# Penalization decreases performance?
# Compare coefficients with non penalized model
print feature_engg_linreg_model.coef_[1:10]
print feature_engg_lassocv_model.coef_[1:10]
示例14: train
# 需要导入模块: from sklearn.linear_model import LassoCV [as 别名]
# 或者: from sklearn.linear_model.LassoCV import predict [as 别名]
#split the data into train(0.75) and test(0.25)
from sklearn.cross_validation import train_test_split
train, test = train_test_split(movie_df, test_size = 0.25)
#fit lasso CV model
#predictor_list is the list of variables that we want to use for modelling
#here I get it from the column names -predictors and movie title. you
#can create your own directly from the name list
predictor_list = list(train.columns.values)
predictor_list.remove('domestic_gross')
predictor_list.remove('tomatoRating')
predictor_list.remove('new_title')
#fit the lasso model, which get selection and estimation at one step.
#if you want to see which variable is significant, there is an easy way:
#just look at the coefficients, greater than 0 means that it is used for prediction
from sklearn.linear_model import LassoCV
clf = LassoCV(cv=20).fit(train[predictor_list],train.domestic_gross)
Y_pred = clf.predict(test[predictor_list])
print(clf.coef_)
#calculate the mean_squared_error
from sklearn.metrics import mean_squared_error
print mean_squared_error(Y_pred, test.domestic_gross)
示例15: print
# 需要导入模块: from sklearn.linear_model import LassoCV [as 别名]
# 或者: from sklearn.linear_model.LassoCV import predict [as 别名]
# 'hot',
# 'frigid',
# 'all_high_snow',
# 'all_high_precip',
'cold'
]
X_total = store_df[columns_list]
X_train = df_train[columns_list]
X_test = df_test[columns_list]
total_data = X_total.values
train_data = X_train.values
test_data = X_test.values
regr = regr.fit( train_data[0::,1::], train_data[0::,0] )
#print(regr.alpha_,store,item)
prediction = regr.predict(test_data[0::,1::])
prediction = np.maximum(prediction, 0.)
prediction_total = regr.predict(total_data[0::,1::])
prediction_total = np.maximum(prediction_total, 0.)
total_series = pd.Series(prediction_total, unique_dates_int)
rmse = np.sqrt(((test_data[0::,0] - prediction) ** 2).mean())
se = ((test_data[0::,0] - prediction) ** 2).sum()
# print(rmse,store,item)
rmse_total = rmse_total + rmse
se_total = se_total + se
# plt.scatter(df_test.index,test_data[0::,0] - prediction)
# plt.xlabel('date')
# plt.xlim(0,1050)
# plt.ylabel('truth - pred')