当前位置: 首页>>代码示例>>Python>>正文


Python linear_model.RidgeClassifier类代码示例

本文整理汇总了Python中sklearn.linear_model.RidgeClassifier的典型用法代码示例。如果您正苦于以下问题:Python RidgeClassifier类的具体用法?Python RidgeClassifier怎么用?Python RidgeClassifier使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了RidgeClassifier类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: train_and_predict_m8

def train_and_predict_m8 (train, test, labels) :
    ## Apply basic concatenation + stemming
    trainData, testData = stemmer_clean (train, test, stemmerEnableM7, stemmer_type = 'porter')

    ## TF-IDF transform with sub-linear TF and stop-word removal
    tfv = TfidfVectorizer(min_df = 5, max_features = None, strip_accents = 'unicode', analyzer = 'word', token_pattern = r'\w{1,}', ngram_range = (1, 5), smooth_idf = 1, sublinear_tf = 1, stop_words = ML_STOP_WORDS)
    tfv.fit(trainData)
    X =  tfv.transform(trainData) 
    X_test = tfv.transform(testData)
    
    ## Create the classifier
    print ("Fitting Ridge Classifer...")
    clf = RidgeClassifier(class_weight = 'auto', alpha = 1, normalize = True)
    
    ## Create a parameter grid to search for best parameters for everything in the pipeline
    param_grid = {'alpha' : [0.1, 0.3, 1, 3, 10], 'normalize' : [True, False]}
    
    ## Predict model with best parameters optimized for quadratic_weighted_kappa
    if (gridSearch) :
        model = perform_grid_search (clf, param_grid, X, labels)    	
        pred = model.predict(X_test)
    else :
        clf.fit(X, labels)    	
        pred = clf.predict(X_test)
    return pred
开发者ID:sathishrvijay,项目名称:Kaggle-CrowdFlowerSRR,代码行数:25,代码来源:classifier.py

示例2: retrain_models

def retrain_models(username):
	train_x, train_y, body_x, body_y, head_x, head_y = model_retriever.retrieve_data_db(username)

	b_train_x = []
	b_train_y = numpy.concatenate([body_y, train_y])

	for msg in (body_x + train_x):
		b_train_x.append(extract_body_features(msg))

	body_vec = TfidfVectorizer(norm="l2")
	b_train_x = body_vec.fit_transform(b_train_x)

	h_train_x = []
	h_train_y = numpy.concatenate([head_y, train_y])

	for msg in (head_x + train_x):
		h_train_x.append(extract_header_features(msg))

	head_vec = DictVectorizer()
	h_train_x = head_vec.fit_transform(h_train_x)

	body_model = LinearSVC(loss='l2', penalty="l2", dual=False, tol=1e-3)
	head_model = RidgeClassifier(tol=1e-2, solver="lsqr")

	body_model.fit(b_train_x, b_train_y)
	head_model.fit(h_train_x, h_train_y)

        print("Finished training models for "+username+"...")

	store_models(username, body_vec, body_model, head_vec, head_model)
开发者ID:dylanrhodes,项目名称:sigma,代码行数:30,代码来源:offline_updater.py

示例3: run

def run(input_train, input_test, output_name):
    """
    Takes a file path as input, a file path as output, and produces a sorted csv of
    item IDs for Kaggle submission
    -------
    input_train : 'full path of the training file'
    input_test : 'full path of the testing file'
    output_name : 'full path of the output file'
    """

    data = pd.read_table(input_train)
    test = pd.read_table(input_test)
    testItemIds = test.itemid
    response = data.is_blocked
    dummies = sparse.csc_matrix(pd.get_dummies(data.subcategory))
    pretestdummies = pd.get_dummies(test.subcategory)
    testdummies = sparse.csc_matrix(pretestdummies.drop(['Растения', 'Товары для компьютера'],axis=1))
    words = np.array(data.description,str)
    testwords = np.array(test.description,str)
    del data, test
    vect = text.CountVectorizer(decode_error = u'ignore', strip_accents='unicode', ngram_range=(1,2))
    corpus = np.concatenate((words, testwords))
    vect.fit(corpus)
    counts = vect.transform(words)
    features = sparse.hstack((dummies,counts))
    clf = RidgeClassifier()
    clf.fit(features, response)
    testcounts = vect.transform(testwords)
    testFeatures = sparse.hstack((testdummies,testcounts))
    predicted_scores = clf.predict_proba(testFeatures).T[1]
    f = open(output_name,'w')
    f.write("id\n") 
    for pred_score, item_id in sorted(zip(predicted_scores, testItemIds), reverse = True):
        f.write("%d\n" % (item_id))
    f.close()
开发者ID:eyedvabny,项目名称:CDIPS-WS-2014,代码行数:35,代码来源:ridge_wordbag.py

示例4: validate

def validate(input_train, rows=True, test=0.25):
    """
    Takes file as input and returns classification report, average precision, and
    AUC for a bigram model. By default, loads all rows of a dataset, trains on .75,
    and tests on .25. 
    ----
    input_train : 'full path of the file you are loading'
    rows : True - loads all rows; insert an int for specific number of rows
    test : float proportion of dataset used for testing
    """
    if rows == True:
        data = pd.read_table(input_train)
    else:
        data = pd.read_table(input_train, nrows = rows)
    response = data.is_blocked
    dummies = sparse.csc_matrix(pd.get_dummies(data.subcategory))
    words = np.array(data.description,str)
    del data
    vect = text.CountVectorizer(decode_error = u'ignore',strip_accents='unicode',ngram_range=(1,2))
    counts = vect.fit_transform(words)
    features = sparse.hstack((dummies,counts))
    features_train, features_test, target_train, target_test = train_test_split(features, response, test_size = test)
    clf = RidgeClassifier()
    clf.fit(features_train, target_train)
    prediction = clf.predict(features_test)
    return classification_report(target_test, prediction), average_precision_score(target_test, prediction), roc_auc_score(target_test, prediction)
开发者ID:eyedvabny,项目名称:CDIPS-WS-2014,代码行数:26,代码来源:ridge_wordbag.py

示例5: test_default_configuration_classify

    def test_default_configuration_classify(self):
        for i in range(2):
            X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits',
                                                           make_sparse=False)
            configuration_space = ExtraTreesPreprocessor.get_hyperparameter_search_space()
            default = configuration_space.get_default_configuration()
            preprocessor = ExtraTreesPreprocessor(random_state=1,
                                                  **{hp_name: default[hp_name]
                                                     for hp_name in default})
            preprocessor.fit(X_train, Y_train)
            X_train_trans = preprocessor.transform(X_train)
            X_test_trans = preprocessor.transform(X_test)

            # fit a classifier on top
            classifier = RidgeClassifier()
            predictor = classifier.fit(X_train_trans, Y_train)
            predictions = predictor.predict(X_test_trans)
            accuracy = sklearn.metrics.accuracy_score(predictions, Y_test)
            self.assertAlmostEqual(accuracy, 0.87310261080752882, places=2)
开发者ID:dongzhixiang,项目名称:paramsklearn,代码行数:19,代码来源:test_extra_trees.py

示例6: test_default_configuration_classify

    def test_default_configuration_classify(self):
        for i in range(5):
            X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits',
                                                           make_sparse=False)
            configuration_space = KernelPCA.get_hyperparameter_search_space()
            default = configuration_space.get_default_configuration()
            preprocessor = KernelPCA(random_state=1,
                                     **{hp_name: default[hp_name] for hp_name in
                                        default if default[hp_name] is not None})
            preprocessor.fit(X_train, Y_train)
            X_train_trans = preprocessor.transform(X_train)
            X_test_trans = preprocessor.transform(X_test)

            # fit a classifier on top
            classifier = RidgeClassifier()
            predictor = classifier.fit(X_train_trans, Y_train)
            predictions = predictor.predict(X_test_trans)
            accuracy = sklearn.metrics.accuracy_score(predictions, Y_test)
            self.assertAlmostEqual(accuracy, 0.096539162112932606)
开发者ID:Allen1203,项目名称:auto-sklearn,代码行数:19,代码来源:test_kernel_pca.py

示例7: test_default_configuration_classify

    def test_default_configuration_classify(self):
        for i in range(2):
            X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits',
                                                           make_sparse=True)
            configuration_space = TruncatedSVD.get_hyperparameter_search_space()
            default = configuration_space.get_default_configuration()
            preprocessor = TruncatedSVD(random_state=1,
                                                  **{hp_name: default[hp_name]
                                                     for hp_name in
                                                     default if default[
                                                      hp_name] is not None})
            preprocessor.fit(X_train, Y_train)
            X_train_trans = preprocessor.transform(X_train)
            X_test_trans = preprocessor.transform(X_test)

            # fit a classifier on top
            classifier = RidgeClassifier()
            predictor = classifier.fit(X_train_trans, Y_train)
            predictions = predictor.predict(X_test_trans)
            accuracy = sklearn.metrics.accuracy_score(predictions, Y_test)
            self.assertAlmostEqual(accuracy, 0.44201578627808136, places=2)
开发者ID:dongzhixiang,项目名称:paramsklearn,代码行数:21,代码来源:test_truncatedSVD.py

示例8: get_optimal_blend_weigth

def get_optimal_blend_weigth(exp_, best_param_,
                             folder, fname, model_fname):
    clf = RidgeClassifier()
    X_test, y_test = exp_.get_test_data()
    clf.set_params(**best_param_)
    clf.fit(X_test, y_test)

    # dump2csv optimal linear weight
    names = np.append(np.array(['intercept'], dtype='S100'), X_test.columns.values)
    coefs = np.append(clf.intercept_, clf.coef_).astype(np.float64)
    optimal_linear_weight = pd.DataFrame(coefs.reshape(1,len(coefs)), columns=names)
    optimal_linear_weight.to_csv(os.path.join(Config.get_string('data.path'),
                                              folder,
                                              fname), index=False)

    # dump2cpkle for ridge model
    model_fname = os.path.join(Config.get_string('data.path'), folder, model_fname)
    with gzip.open(model_fname, 'wb') as gf:
        cPickle.dump(clf, gf, cPickle.HIGHEST_PROTOCOL)
    
    return True
开发者ID:Quasi-quant2010,项目名称:Stacking,代码行数:21,代码来源:run_ridge_grid_search.py

示例9: Predict

def Predict():
    print('\nThere are %d new deals') % n_test

    # Using the KNN classifier
    clf_KNN = KNeighborsClassifier(n_neighbors=3) # KNN doesnot work even if k has been tuned
    #clf_KNN = KNeighborsClassifier(n_neighbors=7)
    #clf_KNN = KNeighborsClassifier(n_neighbors=11)
    clf_KNN.fit(Corpus_train, Y_train)
    Y_pred_KNN = clf_KNN.predict(Corpus_test)
    print_rate(Y_test, Y_pred_KNN, n_test, 'KNNClassifier')
    
    # Using the SVM classifier
    clf_SVM = svm.SVC()
    clf_SVM.fit(Corpus_train, Y_train)
    Y_pred_SVM = clf_SVM.predict(Corpus_test)
    print_rate(Y_test, Y_pred_SVM, n_test, 'SVMClassifier')
    
    # Using the Ridge classifier
    clf_RC = RidgeClassifier(tol=0.01, solver="lsqr")
    #clf_RC = RidgeClassifier(tol=0.1, solver="lsqr")
    clf_RC.fit(Corpus_train, Y_train)
    Y_pred_RC = clf_RC.predict(Corpus_test)
    print_rate(Y_test, Y_pred_RC, n_test, 'RidgeClassifier')
    
    # won't consider Random Forests or Decision Trees beacause they work bad for high sparse dimensions
    
    
    # Using the Multinomial Naive Bayes classifier
    # I expect that this MNB classifier will do the best since it is designed for occurrence counts features
    #clf_MNB = MultinomialNB(alpha=0.01) #smoothing parameter = 0.01 is worse than 0.1
    clf_MNB = MultinomialNB(alpha=0.1)
    #clf_MNB = MultinomialNB(alpha=0.3) #a big smoothing rate doesnot benefit the model
    #clf_MNB = MultinomialNB(alpha=0.2) #or alpha = 0.05 can generate the best outcome
    clf_MNB.fit(Corpus_train, Y_train)
    Y_pred_MNB = clf_MNB.predict(Corpus_test)
    print_rate(Y_test, Y_pred_MNB, n_test, 'MultinomialNBClassifier')
开发者ID:albingrace,项目名称:QianWan,代码行数:36,代码来源:test_3.py

示例10: get_classifier

def get_classifier(classifier):
  if classifier["name"] == 'linear-ridge':
    c = RidgeClassifier()
  elif classifier["name"] == 'SVC':
    c = SVC()
  elif classifier["name"] == "l2-SVC":
    c = L2KernelClassifier()
  elif classifier["name"] == "fredholm":
    c = L2FredholmClassifier()
  elif classifier["name"] == "TSVM":
    c = SVMLight()
  elif classifier["name"] == "Lap-RLSC":
    c = LapRLSC()
  elif classifier["name"] == "fred_kernel_appr":
    c = FredholmKernelApprClassifier()
  else:
    raise NameError('Not existing classifier: ' + classifier["name"] + '.')
  c.set_params(**classifier["params"])
  return c
开发者ID:queqichao,项目名称:FredholmLearning,代码行数:19,代码来源:classifier_help.py

示例11: KFold

# N: number for training examples; K: number of models in level 0
# X: feature matrix; y: result array; z_k: prediction result array for k's model
# 

# Setup 10 fold cross validation
fold_num = 10
kf = KFold(n_samples, k=fold_num, indices=True)

# set number of neighbors for kNN
n_neighb = 13

# Brute-force implementation
clf_bNB     = BernoulliNB(alpha=.01)
clf_mNB     = MultinomialNB(alpha=.01)
clf_kNN     = KNeighborsClassifier(n_neighbors=n_neighb)
clf_ridge   = RidgeClassifier(tol=1e-1)
clf_SGD     = SGDClassifier(alpha=.0001, n_iter=50, penalty="l2")
clf_lSVC    = LinearSVC(loss='l2', penalty='l2', C=1000, dual=False, tol=1e-3)
clf_SVC     = SVC(C=1024, kernel='rbf', degree=3, gamma=0.001, probability=True)


###############################################################################
# Stacking
# 
# initialize empty y and z

print 'X_den shape: ', X_den.shape
print 'y shape:     ', y.shape

n_categories = len(set(y))
z = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=float)
开发者ID:YuanhaoSun,项目名称:PPLearn,代码行数:31,代码来源:20_ensemble_stacking_prob.py

示例12: main

def main():

    startCol = 0
    endCol = 50  # max = 1775

    train = csv_io.read_data("../Data/train.csv")
    target = [x[0] for x in train][1:3000]
    targetTest = [x[0] for x in train][3001:]
    trainTest = [x[startCol+1:endCol+1] for x in train][3001:]
    test = csv_io.read_data("../Data/test.csv")
    test = [x[startCol:endCol] for x in test]
	
    train = [x[startCol+1:endCol+1] for x in train][1:3000]	
	
    fo = open("knn_stats.txt", "a+")

    rf = RidgeClassifier(alpha=0.01, fit_intercept=True, normalize=False, copy_X=True, tol=0.001) 
	
    rf.fit(train, target)
    prob = rf.predict(trainTest) # changed from test


    result = 100
    probSum = 0
    for i in range(0, len(prob)):
        probX = prob[i] # [1]
        if ( probX > 0.7):
            probX = 0.7;		
        if ( probX < 0.3):
            probX = 0.3;
        print i, probSum, probX, target[i]
        print target[i]*log(probX), (1-target[i])*log(1-probX)
        probSum += targetTest[i]*log(probX)+(1-targetTest[i])*log(1-probX)
	
        #print probSum	
        #print len(prob)	
        #print "C: ", 10**C, " gamma: " ,2**g
        print -probSum/len(prob)
	

	
    if ( -probSum/len(prob) < result ):
        result = -probSum/len(prob)
        predicted_probs = rf.predict(test)  # was test
        predicted_probs = ["%f" % x for x in predicted_probs]
        csv_io.write_delimited_file("../Submissions/knn.csv", predicted_probs)
        print "Generated Data!!"
		
    #fo.write(str(5) + str(5)+ str(5));
		
    fo.close()
		
    #csv_io.write_delimited_file("../Submissions/rf_benchmark_test2.csv", predicted_probs)

    #predicted_probs = rf.predict_proba(train) # changed from test
 
    #predicted_probs = ["%f" % x[1] for x in predicted_probs]
    #predicted_probs = rf.predict(train) # changed from test
    #predicted_probs = ["%f" % x for x in predicted_probs]	
	
    #csv_io.write_delimited_file("../Submissions/rf_benchmark_train2.csv", predicted_probs)
	
	
    var = raw_input("Enter to terminate.")								
开发者ID:mb16,项目名称:Kaggle,代码行数:64,代码来源:ridge.py

示例13: KFold

# Notation:
# N: number for training examples; K: number of models in level 0
# X: feature matrix; y: result array; z_k: prediction result array for k's model
# 

# Setup 10 fold cross validation
fold_num = 10
kf = KFold(n_samples, k=fold_num, indices=True)

# set number of neighbors for kNN
n_neighb = 19

# Brute-force implementation
clf_mNB = MultinomialNB(alpha=.01)
clf_kNN = KNeighborsClassifier(n_neighbors=n_neighb)
clf_ridge = RidgeClassifier(tol=1e-1)
clf_lSVC = LinearSVC(loss='l2', penalty='l2', C=0.5, dual=False, tol=1e-3)
clf_SVC = SVC(C=32, gamma=0.0625)
# clf_SGD = SGDClassifier(alpha=.0001, n_iter=50, penalty="l2")

# empty ndarrays for predication results z_kn
z_mNB = np.array([], dtype=np.int32)
z_kNN = np.array([], dtype=np.int32)
z_ridge = np.array([], dtype=np.int32)
z_lSVC = np.array([], dtype=np.int32)
z_SVC = np.array([], dtype=np.int32)


###############################################################################
# Stacking
# 
开发者ID:YuanhaoSun,项目名称:PPLearn,代码行数:31,代码来源:05_Test_stacking_pred.py

示例14: KFold

    # N: number for training examples; K: number of models in level 0
    # X: feature matrix; y: result array; z_k: prediction result array for k's model
    # 

    # Setup 10 fold cross validation
    fold_num = 10
    kf = KFold(n_samples, k=fold_num, indices=True)

    # set number of neighbors for kNN
    n_neighb = 19

    # Brute-force implementation
    clf_bNB     = BernoulliNB(alpha=.01)
    clf_mNB     = MultinomialNB(alpha=.01)
    clf_kNN     = KNeighborsClassifier(n_neighbors=n_neighb)
    clf_ridge   = RidgeClassifier(tol=1e-1)
    clf_lSVC    = LinearSVC(loss='l2', penalty='l2', C=0.5, dual=False, tol=1e-3)
    clf_SVC     = SVC(C=32, gamma=0.0625, probability=True)
    # clf_SGD     = SGDClassifier(alpha=.0001, n_iter=50, penalty="l2")

    ###############################################################################
    # Stacking
    # 
    # initialize empty y and z

    n_categories = len(set(y))
    z = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=float)
    # z = np.zeros( (n_samples, n_categories) , dtype=float)

    # Test for 10 rounds using the results from 10 fold cross validations
    for i, (train_index, test_index) in enumerate(kf):
开发者ID:YuanhaoSun,项目名称:PPLearn,代码行数:31,代码来源:04_01_Ensemble_Stacking_Prob_1010.py

示例15: train_test_split

#!/usr/bin/env python
"""
Ridge regression for Avito
"""
__author__ = "deniederhut"
__license__ = "GPL"
import numpy as np
import pandas as pd
from sklearn.linear_model import RidgeClassifier
from sklearn.metrics import classification_report
from sklearn.cross_validation import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score

data = pd.read_table('/Users/dillonniederhut/Desktop/avito_train.tsv',nrows=100000)
#replace with file path to your training data

features = pd.get_dummies(data.subcategory)
features_train, features_test, target_train, target_test =\
    train_test_split(features, data.is_blocked, test_size = 0.25)

ridge = RidgeClassifier()
ridge.fit(features_train, target_train)
prediction = np.round(ridge.predict(features_test))
print classification_report(target_test, prediction)
print average_precision_score(target_test, prediction)
print roc_auc_score(target_test, prediction)
开发者ID:eyedvabny,项目名称:CDIPS-WS-2014,代码行数:27,代码来源:ridge_benchmark.py


注:本文中的sklearn.linear_model.RidgeClassifier类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。