当前位置: 首页>>代码示例>>Python>>正文


Python linear_model.LassoCV类代码示例

本文整理汇总了Python中sklearn.linear_model.LassoCV的典型用法代码示例。如果您正苦于以下问题:Python LassoCV类的具体用法?Python LassoCV怎么用?Python LassoCV使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了LassoCV类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: lasso_cv

def lasso_cv(x, y, x_pred=None, max_deg=3, cv=10, max_iter=1e3, return_model=False):
    """LASSO polynomial fit with cross-validation.
    
    Regularized polynomial regression (by penalized least-squares) from a
    range of degrees up to n = max_deg. The LASSO regression minimises MSE and
    penalizes the size of the parameter vector using L1-norm, which leads to
    fewer coefficients in the fitted model.

    - The 'alpha' parameter (amount of penalization) is selected by k-fold CV.
    - Predicts fitted model on given values 'x_pred' (default use 'x').
    - Supports NaNs.

    """
    ind, = np.where((~np.isnan(x)) & (~np.isnan(y)))
    x_, y_ = x[ind], y[ind]
    X_ = dmatrix('C(x_, Poly)')
    if x_pred is None:
        X = dmatrix('C(x, Poly)')      # predict on original values
    else:
        X = dmatrix('C(x_pred, Poly)') # predict on given values
    lasso = LassoCV(cv=cv, copy_X=True, normalize=True, max_iter=max_iter)
    lasso = lasso.fit(X_[:,1:max_deg+1], y_)
    y_pred = lasso.predict(X[:,1:max_deg+1])
    if return_model:
        y_pred = [y_pred, lasso]
    return y_pred
开发者ID:fspaolo,项目名称:altimpy,代码行数:26,代码来源:tseries.py

示例2: lassoCV_regression

def lassoCV_regression(data,target,alphas):
    clf=LassoCV()
    sfm = SelectFromModel(clf, threshold=0.25)
    sfm.fit(data, target)
    n_features = sfm.transform(data).shape[1]
    
    while n_features > 2:
        sfm.threshold += 0.1
        data_transform = sfm.transform(data)
        n_features = data_transform.shape[1]
     
    rmses=[]
    kf=KFold(len(target),10,True,None)
    for train_index, test_index in kf:
        data_train,data_test=data_transform[train_index],data_transform[test_index]
        target_train,target_test=target[train_index],target[test_index]
        clf.fit(data_train,target_train)
        rmse=sqrt(np.mean((clf.predict(data_test)-target_test)**2))
        rmses.append(rmse)
        
    x0=np.arange(1,11)
    
    plt.figure()
    plt.plot(x0,rmses,label='LassoCV')
    plt.legend()
    plt.show()
    
    return rmses
开发者ID:eprym,项目名称:EE-239AS,代码行数:28,代码来源:problem5.py

示例3: predict

	def predict(self,trains_x,train_y,tests_x,parameters,times=10,isFile=True,foldername="blend-dir"):
		"""
		Ensamble many features and regression

		:params train_X: dictionary for training
		:params train_y: testing vector
		"""
		#parameter_get
		test_data_sample = tests_x.values()[0]

		if not os.path.exists(foldername):
			os.makedirs(foldername)

		skf = None
		kfold_file = foldername + "/kfold_index.pkl"
		if os.path.exists(kfold_file):
			skf = pickle.load(open(kfold_file,"r"))
		else:
			skf = KFold(n=len(train_y),n_folds=times,shuffle=True)
			pickle.dump(skf,open(kfold_file,"w"))

		blend_train = np.zeros((len(train_y),len(parameters)))
		blend_test = np.zeros((len(test_data_sample),len(parameters)))

		for j,parameter in enumerate(parameters):
			train_x = trains_x[parameter['data']]
			test_x = tests_x[parameter['data']]

			blend_test_tmp = np.zeros((len(test_data_sample),len(parameters)))

			#file path check
			for i, (train_index,valid_index) in enumerate(skf):
				clf = model_select(parameter['parameter'])

				train = train_x[train_index]
				train_valid_y = train_y[train_index]

				kfold_filepath = "./" + foldername + "/parameter_{}_kfold_{}.pkl".format(j,i)

				if os.path.exists(kfold_filepath):
					blend_train_prediction,blend_test_prediction = pickle.load(open(kfold_filepath,"r"))
					blend_train[train_index,j] = np.expm1(clf.predict(train))
					blend_test_tmp[:,i] = np.expm1(clf.predict(test_x))
				else:
					clf.fit(train,np.log1p(train_valid_y))
					blend_train_prediction = np.expm1(clf.predict(train))
					blend_test_prediction = np.expm1(clf.predict(test_x))
					pickle.dump((blend_train_prediction,blend_test_prediction),open(kfold_filepath,"w"))

				blend_train[train_index,j] = blend_train_prediction
				blend_test_tmp[:,i] = blend_test_prediction
			blend_test[:,j] = blend_test_tmp.mean(1)

		#Blending Model
		bclf = LassoCV(n_alphas=100, alphas=None, normalize=True, cv=5, fit_intercept=True, max_iter=10000, positive=True)
		bclf.fit(blend_train, train_y)
		y_test_predict = bclf.predict(blend_test)

		return y_test_predict
开发者ID:tereka114,项目名称:MachineLearningCombinator,代码行数:59,代码来源:Ensembler.py

示例4: bagging

	def bagging(self,trains,tests,train_y,model_name=None):
		blend_train = trains.T
		bclf = LassoCV(n_alphas=100, alphas=None, normalize=True, cv=5, fit_intercept=True, max_iter=10000, positive=True)
		bclf.fit(blend_train, train_y)
		y_test_predict = bclf.predict(tests.T)
		train_predict = bclf.predict(trains.T)

		return train_predict,y_test_predict
开发者ID:tereka114,项目名称:MachineLearningCombinator,代码行数:8,代码来源:Ensembler.py

示例5: fit_Lasso

def fit_Lasso(features_train, labels_train, features_pred):
	model = LassoCV()
	model.fit(features_train, labels_train)
	mse = model.mse_path_
	print "LASSO - Mean square error: ", mse.shape
	# Test the model
	labels_pred = model.predict(features_pred)
	return labels_pred
开发者ID:SU-AstroML,项目名称:AstroML-course,代码行数:8,代码来源:fit_method.py

示例6: lassocv_feature_select

def lassocv_feature_select(df):
    """
    通过LassoCV 进行特征选择
    """    
    X = df.drop(['status'],axis=1)
    y = df['status']
    model_lasso = LassoCV(alphas = [0.1,1,0.001, 0.0005])
    model_lasso.fit(X,y)
    coef = pd.Series(model_lasso.coef_,index=X.columns)
    print(coef.sort_values(ascending=False))
开发者ID:gdzsgcj,项目名称:mygit,代码行数:10,代码来源:feature_engineeing.py

示例7: make_model_and_predict

def make_model_and_predict(train_file, test_file):
    """Given name of training csv file, name of test csv file, constructs
    a random forest model and outputs predictions to a time-stampled csv file.
    If the test_file has SalaryNormalized as an attribute, it will score the
    model and write the result in the file "score<datetime>"
    """

    train = pd.read_csv(train_file)
    valid = pd.read_csv(test_file)
    number_of_word_features = 200
    title_words = count_words_in_column(train, "Title")
    key_count_pairs = [(k,v) for (k,v) in title_words.items() if k not in
                                                stopwords.words('english')]

    key_count_pairs.sort(key=lambda (k,v): -v)

    for word, count in key_count_pairs[:number_of_word_features]:
        add_appearance_count_feature(train, word, "Title")
        add_appearance_count_feature(valid, word, "Title")


    group_features = ["LocationNormalized", "Category", "Company", "SourceName"]

    for f in group_features:
        continuize_feature(train, valid, f, "SalaryNormalized")

    feature_columns = train.columns[12:]

    feature=train[feature_columns]
    label=train.SalaryNormalized
    clf = LassoCV()
    clf.fit(feature, label)

    valid_salary_predict = clf.predict(valid[feature_columns])
    valid["SalaryNormalized_Predict"] = valid_salary_predict

    date_string = re.sub("[ :.]", "", str(datetime.datetime.now()))
    predict_filename = 'predict' + date_string + '.csv'
    score_filename = 'score' + date_string + '.txt'
    with open(predict_filename,'wb') as f:
        valid[["Id","SalaryNormalized_Predict"]].to_csv(f, index=False,
                                                    header=False)

    ##Computes average RMS error and writes score to file
    if hasattr(valid, 'SalaryNormalized'):
        score = 0
        for i,_ in enumerate(valid["SalaryNormalized_Predict"]):
            score += (valid.SalaryNormalized[i] -
                                valid.SalaryNormalized_Predict[i]) **2
        score = math.sqrt(score/len(valid["SalaryNormalized_Predict"]))
        with open (score_filename, 'wb') as f:
            f.write("Train: " + train_file + "\n")
            f.write("Test: " + test_file + "\n")
            f.write("Score: " + str(score) + "\n")
开发者ID:kkwteh,项目名称:job_competition,代码行数:54,代码来源:kaggle_job_model.py

示例8: lassoRegularization

def lassoRegularization(X,Y):
    """
    :param X: data consisting of features (excluding class variable)
    :param Y: column vector consisting of class variable
    :return: report best RMSE value for lasso regularization
    """
    tuningAlpha = [0.1,0.01,0.001]
    lasso = LassoCV(normalize=True, alphas=tuningAlpha, cv=10)
    lasso.fit(X,Y)
    prediction = lasso.predict(X)

    print
    print "LASSO REGULARIZATION"
    print "Best Alpha value for Lasso Regularization : " + str(lasso.alpha_)
    print 'Best RMSE for corresponding Alpha =', np.sqrt(mean_squared_error(Y, prediction))
开发者ID:RonakSumbaly,项目名称:EE239AS-Signal-and-Systems,代码行数:15,代码来源:utility.py

示例9: __init__

class Trainer:
    clf = None
    svm = None

    def __init__(self):
        if config.model is 'SVM':
            self.svm = svm.SVC(kernel='linear', shrinking=True, verbose=False)
            params = {
                'C': np.logspace(-5, -1, num=20), # Range of C values
            }
            self.clf = GridSearchCV(self.svm, params,
                cv      = 5,           # k-fold CV
                n_jobs  = cpu_count(), # Parallelize over CPUs
                verbose = 1,
            )

        elif config.model is 'Regression':
            self.clf = LassoCV(
                cv         = 3,
                max_iter   = 2000,
                n_jobs     = cpu_count(),
                verbose    = True,
            )

    def train(self, featMat, persist=True):
        # Preprocess
        scaler = StandardScaler()
        featMat.X = scaler.fit_transform(featMat.X, featMat.y)

        # Save preprocess output
        self.scaler = scaler
        if persist:
            joblib.dump(scaler, 'preprocess.out')

        # Perform CV
        print('Running trainer on %d rows of data with %d features.' % featMat.X.shape)
        self.clf.fit(featMat.X, featMat.y)

        # Save CV output
        if config.model is 'SVM':
            self.estimator = self.clf.best_estimator_
        elif config.model is 'Regression':
            self.estimator = self.clf
        print(self.estimator)

        if persist:
            joblib.dump(self.clf, 'cv.out')
开发者ID:caomw,项目名称:autocrop,代码行数:47,代码来源:Trainer.py

示例10: __init__

class LocalRegression:
    """This class implements "local" regression. Given a set of training data and a set of unknown data,
           iterate through each unknown spectrum, find the nearest training spectra, and generate a model.
           Each of these local models is optimized using built-in cross validation methods from scikit."""
    def __init__(self, params, n_neighbors = 250):
        """Initialize LocalRegression

        Arguments:
        params = Dict containing the keywords and parameters for the regression method to be used.

        Keyword arguments:
        n_neighbors = User-specified number of training spectra to use to generate the local regression model for each
                      unknown spectrum.

        """
        self.model = LassoCV(**params) # For now, the only option is LASSO. Other methods to be added in the future
                                       # params is a dict containing the keywords and parameters for LassoCV

        self.neighbors = NearestNeighbors(n_neighbors=n_neighbors)

    def fit_predict(self,x_train,y_train, x_predict):
        """Use local regression to predict values for unknown data.

        Arguments:
            x_train = The training data spectra.
            y_train = The values of the quantity being predicted for the training data
            x_predict = The unknown spectra for which y needs to be predicted.
        """
        self.neighbors.fit(x_train)
        predictions = []
        coeffs = []
        intercepts = []
        for i in range(x_predict.shape[0]):
            print('Predicting spectrum ' + str(i + 1))
            x_temp = np.array(x_predict[i])
            foo, ind = self.neighbors.kneighbors([x_temp])
            x_train_local = np.squeeze(x_train[ind])
            y_train_local = np.squeeze(y_train[ind])

            cv = GroupKFold(n_splits=3)
            cv = cv.split(x_train_local, y_train_local,
                          groups=y_train_local)
            self.model.fit(x_train_local, y_train_local)
            predictions.append(self.model.predict([x_temp])[0])
            coeffs.append(self.model.coef_)
            intercepts.append(self.model.intercept_)
        return predictions, coeffs, intercepts
开发者ID:USGS-Astrogeology,项目名称:PySAT,代码行数:47,代码来源:local_regression.py

示例11: lassocvclassifier

def lassocvclassifier(training_samples, eval_samples, vectorizer, do_grid_search=False):
    X_train, Y_train = training_samples
    X_eval, Y_eval = eval_samples
    #clf = SGDClassifier(loss='log', penalty= 'l2',l1_ratio=0.0, n_iter=30, shuffle=True, verbose=False, 
    #                    n_jobs=4, alpha=1e-4, average=True, class_weight=None)
    clf = LassoCV()
   
    clf.fit(X_train, Y_train)
    #y_train_true, y_train_pred = Y_train, clf.predict(X_train)
    print_top_10_words = True
    
    
    scores = cross_validation.cross_val_score(clf, X_train, Y_train, cv=5, n_jobs=5, scoring='log_loss')
    print scores, np.mean(scores), np.median(scores)

    print(clf)
    #scores = cross_validation.cross_val_score(clf.best_estimator_, X_train, Y_train, cv=10, scoring='log_loss')
    #print scores, np.mean(scores), np.median(scores)
    y_true, y_pred = Y_eval, clf.predict(X_eval)
    y_prob = clf.predict_proba(X_eval)
开发者ID:afshinrahimi,项目名称:telstra,代码行数:20,代码来源:models.py

示例12: _regression

	def _regression( self, i_start, i_end ):
		"""
		Model of Lasso
		"""
		X, y = self._AssembleRegressionData_i( i_start, i_end );

		lasso = LassoCV( cv = 10 );
		lasso.fit_intercept = True;
		lasso.fit( X, y );


		res = { "reg_result" : lasso,\
			# Add reg_coefficients in the future!
# Extract Coefficients from LassoCV doesn't quite work. Need to continue
# Note: this needs to be updated to show coefficients for predict!!!!!!!!
#		reg_coefficients = list( lasso.coef_ );		
#		print reg_coefficients

		};

		return res;
开发者ID:quietquanta,项目名称:quant,代码行数:21,代码来源:regression_lasso.py

示例13: remove_foreground_glm

def remove_foreground_glm(
        x, y,
        spatial_mask=None, spectral_mask=None,
        alphas=None, l1_ratio=1.):
    """Summary

    Args:
        x (TYPE): Description
        y (TYPE): Description
        spatial_mask (TYPE, optional): Description
        spectral_mask (TYPE, optional): Description
        alphas (TYPE, optional): Description

    Returns:
        TYPE: Description
    """

    # cast to double and reshape
    x_rs = np.float64(x.reshape((x.shape[0], -1))).T
    y_rs = np.float64(y.flatten())

    if spatial_mask is None:
        spatial_mask_rs = np.ones_like(y_rs, dtype=bool)
    else:
        spatial_mask_rs = spatial_mask.flatten()

    if spectral_mask is None:
        spectral_mask = np.ones(x_rs.shape[1], dtype=bool)

    if alphas is not None:
        alphas = np.atleast_1d(alphas)

    # fit GLM
    if l1_ratio == 1.:
        reg = LassoCV(
            positive=True,
            alphas=alphas,
            n_jobs=-1,
            max_iter=5000
        )
    elif l1_ratio == 0.:
        reg = RidgeCV(
            alphas=alphas,
        )
    else:
        reg = ElasticNetCV(
            positive=True,
            alphas=alphas,
            n_jobs=-1,
            l1_ratio=l1_ratio
        )

    reg.fit(x_rs[spatial_mask_rs][:, spectral_mask], y_rs[spatial_mask_rs])

    y_model = reg.predict(x_rs[:, spectral_mask]).reshape(y.shape)

    glm_coeffs = np.zeros(x_rs.shape[1], dtype=np.float32)
    glm_coeffs[spectral_mask] += reg.coef_

    return y_model, reg, glm_coeffs
开发者ID:DanielLenz,项目名称:phd_helpers,代码行数:60,代码来源:machine_learning.py

示例14: get_model_per_cluster

def get_model_per_cluster(X, Y):
    model_per_cluster = {}
    for c in X.cluster.unique():    
        X_cluster = X[X.cluster==c]
        Y_true = Y[Y.cluster == c].ALSFRS_slope
        
        regr = LassoCV(cv=5)
        regr.fit(X_cluster, Y_true)

        print 'cluster: %d size: %s' % (c, Y_true.shape)
        Y_predict = regr.predict(X_cluster)
        print "\t RMS error (0 is perfect): %.2f" % np.sqrt(np.mean(
            (Y_predict - Y_true) ** 2))
        regression_SS = ((Y_predict - Y_true) ** 2).sum()
        residual_SS =((Y_true - Y_true.mean()) ** 2).sum()
        print '\t coefficient of determination R^2 = %.2f ' % (1.0 - regression_SS/residual_SS) # regr.score(X_cluster, Y_true)
        cov = sum((Y_predict - Y_predict.mean())*(Y_true - Y_true.mean()))
        Y_predict_std = np.sqrt(sum((Y_predict - Y_predict.mean())**2))
        Y_true_std = np.sqrt(sum((Y_true - Y_true.mean())**2))
        print '\t pearson correlation r = %.2f ' % (cov/(Y_predict_std*Y_true_std)) # scipy.stats.pearsonr(Y_predict, Y_true)[0]
        print "3 sample predictions: ", regr.predict(X_cluster)[:3]
        model_per_cluster[c] = {"cluster_train_data_means": X_cluster.mean(), "model" : regr}
    return model_per_cluster
开发者ID:ihadanny,项目名称:turbulence-z-team,代码行数:23,代码来源:modeling_funcs.py

示例15: lassocv_n_random_lasso

def lassocv_n_random_lasso(X, y, n_iter = 30, test_size = 0.2,
                           max_iter = 50000, n_resampling = 2000):
    # find a good alpha using cv
    ss = ShuffleSplit(X.shape[0], n_iter, test_size)
    reg = LassoCV(normalize = True, cv = ss, max_iter = max_iter)
    reg.fit(X, y)
    reg = RandomizedLasso(alpha = reg.alpha_,
                          n_resampling = n_resampling,
                          max_iter = max_iter, normalize = True)
    reg.fit(X, y)
    rank = reg.scores_.argsort()[::-1]
    return (rank, reg.scores_[rank])
开发者ID:lzlarryli,项目名称:limelight,代码行数:12,代码来源:parameter_importance.py


注:本文中的sklearn.linear_model.LassoCV类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。