本文整理汇总了Python中sklearn.linear_model.LassoCV类的典型用法代码示例。如果您正苦于以下问题:Python LassoCV类的具体用法?Python LassoCV怎么用?Python LassoCV使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了LassoCV类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: lasso_cv
def lasso_cv(x, y, x_pred=None, max_deg=3, cv=10, max_iter=1e3, return_model=False):
"""LASSO polynomial fit with cross-validation.
Regularized polynomial regression (by penalized least-squares) from a
range of degrees up to n = max_deg. The LASSO regression minimises MSE and
penalizes the size of the parameter vector using L1-norm, which leads to
fewer coefficients in the fitted model.
- The 'alpha' parameter (amount of penalization) is selected by k-fold CV.
- Predicts fitted model on given values 'x_pred' (default use 'x').
- Supports NaNs.
"""
ind, = np.where((~np.isnan(x)) & (~np.isnan(y)))
x_, y_ = x[ind], y[ind]
X_ = dmatrix('C(x_, Poly)')
if x_pred is None:
X = dmatrix('C(x, Poly)') # predict on original values
else:
X = dmatrix('C(x_pred, Poly)') # predict on given values
lasso = LassoCV(cv=cv, copy_X=True, normalize=True, max_iter=max_iter)
lasso = lasso.fit(X_[:,1:max_deg+1], y_)
y_pred = lasso.predict(X[:,1:max_deg+1])
if return_model:
y_pred = [y_pred, lasso]
return y_pred
示例2: lassoCV_regression
def lassoCV_regression(data,target,alphas):
clf=LassoCV()
sfm = SelectFromModel(clf, threshold=0.25)
sfm.fit(data, target)
n_features = sfm.transform(data).shape[1]
while n_features > 2:
sfm.threshold += 0.1
data_transform = sfm.transform(data)
n_features = data_transform.shape[1]
rmses=[]
kf=KFold(len(target),10,True,None)
for train_index, test_index in kf:
data_train,data_test=data_transform[train_index],data_transform[test_index]
target_train,target_test=target[train_index],target[test_index]
clf.fit(data_train,target_train)
rmse=sqrt(np.mean((clf.predict(data_test)-target_test)**2))
rmses.append(rmse)
x0=np.arange(1,11)
plt.figure()
plt.plot(x0,rmses,label='LassoCV')
plt.legend()
plt.show()
return rmses
示例3: predict
def predict(self,trains_x,train_y,tests_x,parameters,times=10,isFile=True,foldername="blend-dir"):
"""
Ensamble many features and regression
:params train_X: dictionary for training
:params train_y: testing vector
"""
#parameter_get
test_data_sample = tests_x.values()[0]
if not os.path.exists(foldername):
os.makedirs(foldername)
skf = None
kfold_file = foldername + "/kfold_index.pkl"
if os.path.exists(kfold_file):
skf = pickle.load(open(kfold_file,"r"))
else:
skf = KFold(n=len(train_y),n_folds=times,shuffle=True)
pickle.dump(skf,open(kfold_file,"w"))
blend_train = np.zeros((len(train_y),len(parameters)))
blend_test = np.zeros((len(test_data_sample),len(parameters)))
for j,parameter in enumerate(parameters):
train_x = trains_x[parameter['data']]
test_x = tests_x[parameter['data']]
blend_test_tmp = np.zeros((len(test_data_sample),len(parameters)))
#file path check
for i, (train_index,valid_index) in enumerate(skf):
clf = model_select(parameter['parameter'])
train = train_x[train_index]
train_valid_y = train_y[train_index]
kfold_filepath = "./" + foldername + "/parameter_{}_kfold_{}.pkl".format(j,i)
if os.path.exists(kfold_filepath):
blend_train_prediction,blend_test_prediction = pickle.load(open(kfold_filepath,"r"))
blend_train[train_index,j] = np.expm1(clf.predict(train))
blend_test_tmp[:,i] = np.expm1(clf.predict(test_x))
else:
clf.fit(train,np.log1p(train_valid_y))
blend_train_prediction = np.expm1(clf.predict(train))
blend_test_prediction = np.expm1(clf.predict(test_x))
pickle.dump((blend_train_prediction,blend_test_prediction),open(kfold_filepath,"w"))
blend_train[train_index,j] = blend_train_prediction
blend_test_tmp[:,i] = blend_test_prediction
blend_test[:,j] = blend_test_tmp.mean(1)
#Blending Model
bclf = LassoCV(n_alphas=100, alphas=None, normalize=True, cv=5, fit_intercept=True, max_iter=10000, positive=True)
bclf.fit(blend_train, train_y)
y_test_predict = bclf.predict(blend_test)
return y_test_predict
示例4: bagging
def bagging(self,trains,tests,train_y,model_name=None):
blend_train = trains.T
bclf = LassoCV(n_alphas=100, alphas=None, normalize=True, cv=5, fit_intercept=True, max_iter=10000, positive=True)
bclf.fit(blend_train, train_y)
y_test_predict = bclf.predict(tests.T)
train_predict = bclf.predict(trains.T)
return train_predict,y_test_predict
示例5: fit_Lasso
def fit_Lasso(features_train, labels_train, features_pred):
model = LassoCV()
model.fit(features_train, labels_train)
mse = model.mse_path_
print "LASSO - Mean square error: ", mse.shape
# Test the model
labels_pred = model.predict(features_pred)
return labels_pred
示例6: lassocv_feature_select
def lassocv_feature_select(df):
"""
通过LassoCV 进行特征选择
"""
X = df.drop(['status'],axis=1)
y = df['status']
model_lasso = LassoCV(alphas = [0.1,1,0.001, 0.0005])
model_lasso.fit(X,y)
coef = pd.Series(model_lasso.coef_,index=X.columns)
print(coef.sort_values(ascending=False))
示例7: make_model_and_predict
def make_model_and_predict(train_file, test_file):
"""Given name of training csv file, name of test csv file, constructs
a random forest model and outputs predictions to a time-stampled csv file.
If the test_file has SalaryNormalized as an attribute, it will score the
model and write the result in the file "score<datetime>"
"""
train = pd.read_csv(train_file)
valid = pd.read_csv(test_file)
number_of_word_features = 200
title_words = count_words_in_column(train, "Title")
key_count_pairs = [(k,v) for (k,v) in title_words.items() if k not in
stopwords.words('english')]
key_count_pairs.sort(key=lambda (k,v): -v)
for word, count in key_count_pairs[:number_of_word_features]:
add_appearance_count_feature(train, word, "Title")
add_appearance_count_feature(valid, word, "Title")
group_features = ["LocationNormalized", "Category", "Company", "SourceName"]
for f in group_features:
continuize_feature(train, valid, f, "SalaryNormalized")
feature_columns = train.columns[12:]
feature=train[feature_columns]
label=train.SalaryNormalized
clf = LassoCV()
clf.fit(feature, label)
valid_salary_predict = clf.predict(valid[feature_columns])
valid["SalaryNormalized_Predict"] = valid_salary_predict
date_string = re.sub("[ :.]", "", str(datetime.datetime.now()))
predict_filename = 'predict' + date_string + '.csv'
score_filename = 'score' + date_string + '.txt'
with open(predict_filename,'wb') as f:
valid[["Id","SalaryNormalized_Predict"]].to_csv(f, index=False,
header=False)
##Computes average RMS error and writes score to file
if hasattr(valid, 'SalaryNormalized'):
score = 0
for i,_ in enumerate(valid["SalaryNormalized_Predict"]):
score += (valid.SalaryNormalized[i] -
valid.SalaryNormalized_Predict[i]) **2
score = math.sqrt(score/len(valid["SalaryNormalized_Predict"]))
with open (score_filename, 'wb') as f:
f.write("Train: " + train_file + "\n")
f.write("Test: " + test_file + "\n")
f.write("Score: " + str(score) + "\n")
示例8: lassoRegularization
def lassoRegularization(X,Y):
"""
:param X: data consisting of features (excluding class variable)
:param Y: column vector consisting of class variable
:return: report best RMSE value for lasso regularization
"""
tuningAlpha = [0.1,0.01,0.001]
lasso = LassoCV(normalize=True, alphas=tuningAlpha, cv=10)
lasso.fit(X,Y)
prediction = lasso.predict(X)
print
print "LASSO REGULARIZATION"
print "Best Alpha value for Lasso Regularization : " + str(lasso.alpha_)
print 'Best RMSE for corresponding Alpha =', np.sqrt(mean_squared_error(Y, prediction))
示例9: __init__
class Trainer:
clf = None
svm = None
def __init__(self):
if config.model is 'SVM':
self.svm = svm.SVC(kernel='linear', shrinking=True, verbose=False)
params = {
'C': np.logspace(-5, -1, num=20), # Range of C values
}
self.clf = GridSearchCV(self.svm, params,
cv = 5, # k-fold CV
n_jobs = cpu_count(), # Parallelize over CPUs
verbose = 1,
)
elif config.model is 'Regression':
self.clf = LassoCV(
cv = 3,
max_iter = 2000,
n_jobs = cpu_count(),
verbose = True,
)
def train(self, featMat, persist=True):
# Preprocess
scaler = StandardScaler()
featMat.X = scaler.fit_transform(featMat.X, featMat.y)
# Save preprocess output
self.scaler = scaler
if persist:
joblib.dump(scaler, 'preprocess.out')
# Perform CV
print('Running trainer on %d rows of data with %d features.' % featMat.X.shape)
self.clf.fit(featMat.X, featMat.y)
# Save CV output
if config.model is 'SVM':
self.estimator = self.clf.best_estimator_
elif config.model is 'Regression':
self.estimator = self.clf
print(self.estimator)
if persist:
joblib.dump(self.clf, 'cv.out')
示例10: __init__
class LocalRegression:
"""This class implements "local" regression. Given a set of training data and a set of unknown data,
iterate through each unknown spectrum, find the nearest training spectra, and generate a model.
Each of these local models is optimized using built-in cross validation methods from scikit."""
def __init__(self, params, n_neighbors = 250):
"""Initialize LocalRegression
Arguments:
params = Dict containing the keywords and parameters for the regression method to be used.
Keyword arguments:
n_neighbors = User-specified number of training spectra to use to generate the local regression model for each
unknown spectrum.
"""
self.model = LassoCV(**params) # For now, the only option is LASSO. Other methods to be added in the future
# params is a dict containing the keywords and parameters for LassoCV
self.neighbors = NearestNeighbors(n_neighbors=n_neighbors)
def fit_predict(self,x_train,y_train, x_predict):
"""Use local regression to predict values for unknown data.
Arguments:
x_train = The training data spectra.
y_train = The values of the quantity being predicted for the training data
x_predict = The unknown spectra for which y needs to be predicted.
"""
self.neighbors.fit(x_train)
predictions = []
coeffs = []
intercepts = []
for i in range(x_predict.shape[0]):
print('Predicting spectrum ' + str(i + 1))
x_temp = np.array(x_predict[i])
foo, ind = self.neighbors.kneighbors([x_temp])
x_train_local = np.squeeze(x_train[ind])
y_train_local = np.squeeze(y_train[ind])
cv = GroupKFold(n_splits=3)
cv = cv.split(x_train_local, y_train_local,
groups=y_train_local)
self.model.fit(x_train_local, y_train_local)
predictions.append(self.model.predict([x_temp])[0])
coeffs.append(self.model.coef_)
intercepts.append(self.model.intercept_)
return predictions, coeffs, intercepts
示例11: lassocvclassifier
def lassocvclassifier(training_samples, eval_samples, vectorizer, do_grid_search=False):
X_train, Y_train = training_samples
X_eval, Y_eval = eval_samples
#clf = SGDClassifier(loss='log', penalty= 'l2',l1_ratio=0.0, n_iter=30, shuffle=True, verbose=False,
# n_jobs=4, alpha=1e-4, average=True, class_weight=None)
clf = LassoCV()
clf.fit(X_train, Y_train)
#y_train_true, y_train_pred = Y_train, clf.predict(X_train)
print_top_10_words = True
scores = cross_validation.cross_val_score(clf, X_train, Y_train, cv=5, n_jobs=5, scoring='log_loss')
print scores, np.mean(scores), np.median(scores)
print(clf)
#scores = cross_validation.cross_val_score(clf.best_estimator_, X_train, Y_train, cv=10, scoring='log_loss')
#print scores, np.mean(scores), np.median(scores)
y_true, y_pred = Y_eval, clf.predict(X_eval)
y_prob = clf.predict_proba(X_eval)
示例12: _regression
def _regression( self, i_start, i_end ):
"""
Model of Lasso
"""
X, y = self._AssembleRegressionData_i( i_start, i_end );
lasso = LassoCV( cv = 10 );
lasso.fit_intercept = True;
lasso.fit( X, y );
res = { "reg_result" : lasso,\
# Add reg_coefficients in the future!
# Extract Coefficients from LassoCV doesn't quite work. Need to continue
# Note: this needs to be updated to show coefficients for predict!!!!!!!!
# reg_coefficients = list( lasso.coef_ );
# print reg_coefficients
};
return res;
示例13: remove_foreground_glm
def remove_foreground_glm(
x, y,
spatial_mask=None, spectral_mask=None,
alphas=None, l1_ratio=1.):
"""Summary
Args:
x (TYPE): Description
y (TYPE): Description
spatial_mask (TYPE, optional): Description
spectral_mask (TYPE, optional): Description
alphas (TYPE, optional): Description
Returns:
TYPE: Description
"""
# cast to double and reshape
x_rs = np.float64(x.reshape((x.shape[0], -1))).T
y_rs = np.float64(y.flatten())
if spatial_mask is None:
spatial_mask_rs = np.ones_like(y_rs, dtype=bool)
else:
spatial_mask_rs = spatial_mask.flatten()
if spectral_mask is None:
spectral_mask = np.ones(x_rs.shape[1], dtype=bool)
if alphas is not None:
alphas = np.atleast_1d(alphas)
# fit GLM
if l1_ratio == 1.:
reg = LassoCV(
positive=True,
alphas=alphas,
n_jobs=-1,
max_iter=5000
)
elif l1_ratio == 0.:
reg = RidgeCV(
alphas=alphas,
)
else:
reg = ElasticNetCV(
positive=True,
alphas=alphas,
n_jobs=-1,
l1_ratio=l1_ratio
)
reg.fit(x_rs[spatial_mask_rs][:, spectral_mask], y_rs[spatial_mask_rs])
y_model = reg.predict(x_rs[:, spectral_mask]).reshape(y.shape)
glm_coeffs = np.zeros(x_rs.shape[1], dtype=np.float32)
glm_coeffs[spectral_mask] += reg.coef_
return y_model, reg, glm_coeffs
示例14: get_model_per_cluster
def get_model_per_cluster(X, Y):
model_per_cluster = {}
for c in X.cluster.unique():
X_cluster = X[X.cluster==c]
Y_true = Y[Y.cluster == c].ALSFRS_slope
regr = LassoCV(cv=5)
regr.fit(X_cluster, Y_true)
print 'cluster: %d size: %s' % (c, Y_true.shape)
Y_predict = regr.predict(X_cluster)
print "\t RMS error (0 is perfect): %.2f" % np.sqrt(np.mean(
(Y_predict - Y_true) ** 2))
regression_SS = ((Y_predict - Y_true) ** 2).sum()
residual_SS =((Y_true - Y_true.mean()) ** 2).sum()
print '\t coefficient of determination R^2 = %.2f ' % (1.0 - regression_SS/residual_SS) # regr.score(X_cluster, Y_true)
cov = sum((Y_predict - Y_predict.mean())*(Y_true - Y_true.mean()))
Y_predict_std = np.sqrt(sum((Y_predict - Y_predict.mean())**2))
Y_true_std = np.sqrt(sum((Y_true - Y_true.mean())**2))
print '\t pearson correlation r = %.2f ' % (cov/(Y_predict_std*Y_true_std)) # scipy.stats.pearsonr(Y_predict, Y_true)[0]
print "3 sample predictions: ", regr.predict(X_cluster)[:3]
model_per_cluster[c] = {"cluster_train_data_means": X_cluster.mean(), "model" : regr}
return model_per_cluster
示例15: lassocv_n_random_lasso
def lassocv_n_random_lasso(X, y, n_iter = 30, test_size = 0.2,
max_iter = 50000, n_resampling = 2000):
# find a good alpha using cv
ss = ShuffleSplit(X.shape[0], n_iter, test_size)
reg = LassoCV(normalize = True, cv = ss, max_iter = max_iter)
reg.fit(X, y)
reg = RandomizedLasso(alpha = reg.alpha_,
n_resampling = n_resampling,
max_iter = max_iter, normalize = True)
reg.fit(X, y)
rank = reg.scores_.argsort()[::-1]
return (rank, reg.scores_[rank])