本文整理汇总了Python中sklearn.linear_model.RidgeClassifier类的典型用法代码示例。如果您正苦于以下问题:Python RidgeClassifier类的具体用法?Python RidgeClassifier怎么用?Python RidgeClassifier使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了RidgeClassifier类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train_and_predict_m8
def train_and_predict_m8 (train, test, labels) :
## Apply basic concatenation + stemming
trainData, testData = stemmer_clean (train, test, stemmerEnableM7, stemmer_type = 'porter')
## TF-IDF transform with sub-linear TF and stop-word removal
tfv = TfidfVectorizer(min_df = 5, max_features = None, strip_accents = 'unicode', analyzer = 'word', token_pattern = r'\w{1,}', ngram_range = (1, 5), smooth_idf = 1, sublinear_tf = 1, stop_words = ML_STOP_WORDS)
tfv.fit(trainData)
X = tfv.transform(trainData)
X_test = tfv.transform(testData)
## Create the classifier
print ("Fitting Ridge Classifer...")
clf = RidgeClassifier(class_weight = 'auto', alpha = 1, normalize = True)
## Create a parameter grid to search for best parameters for everything in the pipeline
param_grid = {'alpha' : [0.1, 0.3, 1, 3, 10], 'normalize' : [True, False]}
## Predict model with best parameters optimized for quadratic_weighted_kappa
if (gridSearch) :
model = perform_grid_search (clf, param_grid, X, labels)
pred = model.predict(X_test)
else :
clf.fit(X, labels)
pred = clf.predict(X_test)
return pred
示例2: retrain_models
def retrain_models(username):
train_x, train_y, body_x, body_y, head_x, head_y = model_retriever.retrieve_data_db(username)
b_train_x = []
b_train_y = numpy.concatenate([body_y, train_y])
for msg in (body_x + train_x):
b_train_x.append(extract_body_features(msg))
body_vec = TfidfVectorizer(norm="l2")
b_train_x = body_vec.fit_transform(b_train_x)
h_train_x = []
h_train_y = numpy.concatenate([head_y, train_y])
for msg in (head_x + train_x):
h_train_x.append(extract_header_features(msg))
head_vec = DictVectorizer()
h_train_x = head_vec.fit_transform(h_train_x)
body_model = LinearSVC(loss='l2', penalty="l2", dual=False, tol=1e-3)
head_model = RidgeClassifier(tol=1e-2, solver="lsqr")
body_model.fit(b_train_x, b_train_y)
head_model.fit(h_train_x, h_train_y)
print("Finished training models for "+username+"...")
store_models(username, body_vec, body_model, head_vec, head_model)
示例3: run
def run(input_train, input_test, output_name):
"""
Takes a file path as input, a file path as output, and produces a sorted csv of
item IDs for Kaggle submission
-------
input_train : 'full path of the training file'
input_test : 'full path of the testing file'
output_name : 'full path of the output file'
"""
data = pd.read_table(input_train)
test = pd.read_table(input_test)
testItemIds = test.itemid
response = data.is_blocked
dummies = sparse.csc_matrix(pd.get_dummies(data.subcategory))
pretestdummies = pd.get_dummies(test.subcategory)
testdummies = sparse.csc_matrix(pretestdummies.drop(['Растения', 'Товары для компьютера'],axis=1))
words = np.array(data.description,str)
testwords = np.array(test.description,str)
del data, test
vect = text.CountVectorizer(decode_error = u'ignore', strip_accents='unicode', ngram_range=(1,2))
corpus = np.concatenate((words, testwords))
vect.fit(corpus)
counts = vect.transform(words)
features = sparse.hstack((dummies,counts))
clf = RidgeClassifier()
clf.fit(features, response)
testcounts = vect.transform(testwords)
testFeatures = sparse.hstack((testdummies,testcounts))
predicted_scores = clf.predict_proba(testFeatures).T[1]
f = open(output_name,'w')
f.write("id\n")
for pred_score, item_id in sorted(zip(predicted_scores, testItemIds), reverse = True):
f.write("%d\n" % (item_id))
f.close()
示例4: validate
def validate(input_train, rows=True, test=0.25):
"""
Takes file as input and returns classification report, average precision, and
AUC for a bigram model. By default, loads all rows of a dataset, trains on .75,
and tests on .25.
----
input_train : 'full path of the file you are loading'
rows : True - loads all rows; insert an int for specific number of rows
test : float proportion of dataset used for testing
"""
if rows == True:
data = pd.read_table(input_train)
else:
data = pd.read_table(input_train, nrows = rows)
response = data.is_blocked
dummies = sparse.csc_matrix(pd.get_dummies(data.subcategory))
words = np.array(data.description,str)
del data
vect = text.CountVectorizer(decode_error = u'ignore',strip_accents='unicode',ngram_range=(1,2))
counts = vect.fit_transform(words)
features = sparse.hstack((dummies,counts))
features_train, features_test, target_train, target_test = train_test_split(features, response, test_size = test)
clf = RidgeClassifier()
clf.fit(features_train, target_train)
prediction = clf.predict(features_test)
return classification_report(target_test, prediction), average_precision_score(target_test, prediction), roc_auc_score(target_test, prediction)
示例5: test_default_configuration_classify
def test_default_configuration_classify(self):
for i in range(2):
X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits',
make_sparse=False)
configuration_space = ExtraTreesPreprocessor.get_hyperparameter_search_space()
default = configuration_space.get_default_configuration()
preprocessor = ExtraTreesPreprocessor(random_state=1,
**{hp_name: default[hp_name]
for hp_name in default})
preprocessor.fit(X_train, Y_train)
X_train_trans = preprocessor.transform(X_train)
X_test_trans = preprocessor.transform(X_test)
# fit a classifier on top
classifier = RidgeClassifier()
predictor = classifier.fit(X_train_trans, Y_train)
predictions = predictor.predict(X_test_trans)
accuracy = sklearn.metrics.accuracy_score(predictions, Y_test)
self.assertAlmostEqual(accuracy, 0.87310261080752882, places=2)
示例6: test_default_configuration_classify
def test_default_configuration_classify(self):
for i in range(5):
X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits',
make_sparse=False)
configuration_space = KernelPCA.get_hyperparameter_search_space()
default = configuration_space.get_default_configuration()
preprocessor = KernelPCA(random_state=1,
**{hp_name: default[hp_name] for hp_name in
default if default[hp_name] is not None})
preprocessor.fit(X_train, Y_train)
X_train_trans = preprocessor.transform(X_train)
X_test_trans = preprocessor.transform(X_test)
# fit a classifier on top
classifier = RidgeClassifier()
predictor = classifier.fit(X_train_trans, Y_train)
predictions = predictor.predict(X_test_trans)
accuracy = sklearn.metrics.accuracy_score(predictions, Y_test)
self.assertAlmostEqual(accuracy, 0.096539162112932606)
示例7: test_default_configuration_classify
def test_default_configuration_classify(self):
for i in range(2):
X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits',
make_sparse=True)
configuration_space = TruncatedSVD.get_hyperparameter_search_space()
default = configuration_space.get_default_configuration()
preprocessor = TruncatedSVD(random_state=1,
**{hp_name: default[hp_name]
for hp_name in
default if default[
hp_name] is not None})
preprocessor.fit(X_train, Y_train)
X_train_trans = preprocessor.transform(X_train)
X_test_trans = preprocessor.transform(X_test)
# fit a classifier on top
classifier = RidgeClassifier()
predictor = classifier.fit(X_train_trans, Y_train)
predictions = predictor.predict(X_test_trans)
accuracy = sklearn.metrics.accuracy_score(predictions, Y_test)
self.assertAlmostEqual(accuracy, 0.44201578627808136, places=2)
示例8: get_optimal_blend_weigth
def get_optimal_blend_weigth(exp_, best_param_,
folder, fname, model_fname):
clf = RidgeClassifier()
X_test, y_test = exp_.get_test_data()
clf.set_params(**best_param_)
clf.fit(X_test, y_test)
# dump2csv optimal linear weight
names = np.append(np.array(['intercept'], dtype='S100'), X_test.columns.values)
coefs = np.append(clf.intercept_, clf.coef_).astype(np.float64)
optimal_linear_weight = pd.DataFrame(coefs.reshape(1,len(coefs)), columns=names)
optimal_linear_weight.to_csv(os.path.join(Config.get_string('data.path'),
folder,
fname), index=False)
# dump2cpkle for ridge model
model_fname = os.path.join(Config.get_string('data.path'), folder, model_fname)
with gzip.open(model_fname, 'wb') as gf:
cPickle.dump(clf, gf, cPickle.HIGHEST_PROTOCOL)
return True
示例9: Predict
def Predict():
print('\nThere are %d new deals') % n_test
# Using the KNN classifier
clf_KNN = KNeighborsClassifier(n_neighbors=3) # KNN doesnot work even if k has been tuned
#clf_KNN = KNeighborsClassifier(n_neighbors=7)
#clf_KNN = KNeighborsClassifier(n_neighbors=11)
clf_KNN.fit(Corpus_train, Y_train)
Y_pred_KNN = clf_KNN.predict(Corpus_test)
print_rate(Y_test, Y_pred_KNN, n_test, 'KNNClassifier')
# Using the SVM classifier
clf_SVM = svm.SVC()
clf_SVM.fit(Corpus_train, Y_train)
Y_pred_SVM = clf_SVM.predict(Corpus_test)
print_rate(Y_test, Y_pred_SVM, n_test, 'SVMClassifier')
# Using the Ridge classifier
clf_RC = RidgeClassifier(tol=0.01, solver="lsqr")
#clf_RC = RidgeClassifier(tol=0.1, solver="lsqr")
clf_RC.fit(Corpus_train, Y_train)
Y_pred_RC = clf_RC.predict(Corpus_test)
print_rate(Y_test, Y_pred_RC, n_test, 'RidgeClassifier')
# won't consider Random Forests or Decision Trees beacause they work bad for high sparse dimensions
# Using the Multinomial Naive Bayes classifier
# I expect that this MNB classifier will do the best since it is designed for occurrence counts features
#clf_MNB = MultinomialNB(alpha=0.01) #smoothing parameter = 0.01 is worse than 0.1
clf_MNB = MultinomialNB(alpha=0.1)
#clf_MNB = MultinomialNB(alpha=0.3) #a big smoothing rate doesnot benefit the model
#clf_MNB = MultinomialNB(alpha=0.2) #or alpha = 0.05 can generate the best outcome
clf_MNB.fit(Corpus_train, Y_train)
Y_pred_MNB = clf_MNB.predict(Corpus_test)
print_rate(Y_test, Y_pred_MNB, n_test, 'MultinomialNBClassifier')
示例10: get_classifier
def get_classifier(classifier):
if classifier["name"] == 'linear-ridge':
c = RidgeClassifier()
elif classifier["name"] == 'SVC':
c = SVC()
elif classifier["name"] == "l2-SVC":
c = L2KernelClassifier()
elif classifier["name"] == "fredholm":
c = L2FredholmClassifier()
elif classifier["name"] == "TSVM":
c = SVMLight()
elif classifier["name"] == "Lap-RLSC":
c = LapRLSC()
elif classifier["name"] == "fred_kernel_appr":
c = FredholmKernelApprClassifier()
else:
raise NameError('Not existing classifier: ' + classifier["name"] + '.')
c.set_params(**classifier["params"])
return c
示例11: KFold
# N: number for training examples; K: number of models in level 0
# X: feature matrix; y: result array; z_k: prediction result array for k's model
#
# Setup 10 fold cross validation
fold_num = 10
kf = KFold(n_samples, k=fold_num, indices=True)
# set number of neighbors for kNN
n_neighb = 13
# Brute-force implementation
clf_bNB = BernoulliNB(alpha=.01)
clf_mNB = MultinomialNB(alpha=.01)
clf_kNN = KNeighborsClassifier(n_neighbors=n_neighb)
clf_ridge = RidgeClassifier(tol=1e-1)
clf_SGD = SGDClassifier(alpha=.0001, n_iter=50, penalty="l2")
clf_lSVC = LinearSVC(loss='l2', penalty='l2', C=1000, dual=False, tol=1e-3)
clf_SVC = SVC(C=1024, kernel='rbf', degree=3, gamma=0.001, probability=True)
###############################################################################
# Stacking
#
# initialize empty y and z
print 'X_den shape: ', X_den.shape
print 'y shape: ', y.shape
n_categories = len(set(y))
z = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=float)
示例12: main
def main():
startCol = 0
endCol = 50 # max = 1775
train = csv_io.read_data("../Data/train.csv")
target = [x[0] for x in train][1:3000]
targetTest = [x[0] for x in train][3001:]
trainTest = [x[startCol+1:endCol+1] for x in train][3001:]
test = csv_io.read_data("../Data/test.csv")
test = [x[startCol:endCol] for x in test]
train = [x[startCol+1:endCol+1] for x in train][1:3000]
fo = open("knn_stats.txt", "a+")
rf = RidgeClassifier(alpha=0.01, fit_intercept=True, normalize=False, copy_X=True, tol=0.001)
rf.fit(train, target)
prob = rf.predict(trainTest) # changed from test
result = 100
probSum = 0
for i in range(0, len(prob)):
probX = prob[i] # [1]
if ( probX > 0.7):
probX = 0.7;
if ( probX < 0.3):
probX = 0.3;
print i, probSum, probX, target[i]
print target[i]*log(probX), (1-target[i])*log(1-probX)
probSum += targetTest[i]*log(probX)+(1-targetTest[i])*log(1-probX)
#print probSum
#print len(prob)
#print "C: ", 10**C, " gamma: " ,2**g
print -probSum/len(prob)
if ( -probSum/len(prob) < result ):
result = -probSum/len(prob)
predicted_probs = rf.predict(test) # was test
predicted_probs = ["%f" % x for x in predicted_probs]
csv_io.write_delimited_file("../Submissions/knn.csv", predicted_probs)
print "Generated Data!!"
#fo.write(str(5) + str(5)+ str(5));
fo.close()
#csv_io.write_delimited_file("../Submissions/rf_benchmark_test2.csv", predicted_probs)
#predicted_probs = rf.predict_proba(train) # changed from test
#predicted_probs = ["%f" % x[1] for x in predicted_probs]
#predicted_probs = rf.predict(train) # changed from test
#predicted_probs = ["%f" % x for x in predicted_probs]
#csv_io.write_delimited_file("../Submissions/rf_benchmark_train2.csv", predicted_probs)
var = raw_input("Enter to terminate.")
示例13: KFold
# Notation:
# N: number for training examples; K: number of models in level 0
# X: feature matrix; y: result array; z_k: prediction result array for k's model
#
# Setup 10 fold cross validation
fold_num = 10
kf = KFold(n_samples, k=fold_num, indices=True)
# set number of neighbors for kNN
n_neighb = 19
# Brute-force implementation
clf_mNB = MultinomialNB(alpha=.01)
clf_kNN = KNeighborsClassifier(n_neighbors=n_neighb)
clf_ridge = RidgeClassifier(tol=1e-1)
clf_lSVC = LinearSVC(loss='l2', penalty='l2', C=0.5, dual=False, tol=1e-3)
clf_SVC = SVC(C=32, gamma=0.0625)
# clf_SGD = SGDClassifier(alpha=.0001, n_iter=50, penalty="l2")
# empty ndarrays for predication results z_kn
z_mNB = np.array([], dtype=np.int32)
z_kNN = np.array([], dtype=np.int32)
z_ridge = np.array([], dtype=np.int32)
z_lSVC = np.array([], dtype=np.int32)
z_SVC = np.array([], dtype=np.int32)
###############################################################################
# Stacking
#
示例14: KFold
# N: number for training examples; K: number of models in level 0
# X: feature matrix; y: result array; z_k: prediction result array for k's model
#
# Setup 10 fold cross validation
fold_num = 10
kf = KFold(n_samples, k=fold_num, indices=True)
# set number of neighbors for kNN
n_neighb = 19
# Brute-force implementation
clf_bNB = BernoulliNB(alpha=.01)
clf_mNB = MultinomialNB(alpha=.01)
clf_kNN = KNeighborsClassifier(n_neighbors=n_neighb)
clf_ridge = RidgeClassifier(tol=1e-1)
clf_lSVC = LinearSVC(loss='l2', penalty='l2', C=0.5, dual=False, tol=1e-3)
clf_SVC = SVC(C=32, gamma=0.0625, probability=True)
# clf_SGD = SGDClassifier(alpha=.0001, n_iter=50, penalty="l2")
###############################################################################
# Stacking
#
# initialize empty y and z
n_categories = len(set(y))
z = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=float)
# z = np.zeros( (n_samples, n_categories) , dtype=float)
# Test for 10 rounds using the results from 10 fold cross validations
for i, (train_index, test_index) in enumerate(kf):
示例15: train_test_split
#!/usr/bin/env python
"""
Ridge regression for Avito
"""
__author__ = "deniederhut"
__license__ = "GPL"
import numpy as np
import pandas as pd
from sklearn.linear_model import RidgeClassifier
from sklearn.metrics import classification_report
from sklearn.cross_validation import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score
data = pd.read_table('/Users/dillonniederhut/Desktop/avito_train.tsv',nrows=100000)
#replace with file path to your training data
features = pd.get_dummies(data.subcategory)
features_train, features_test, target_train, target_test =\
train_test_split(features, data.is_blocked, test_size = 0.25)
ridge = RidgeClassifier()
ridge.fit(features_train, target_train)
prediction = np.round(ridge.predict(features_test))
print classification_report(target_test, prediction)
print average_precision_score(target_test, prediction)
print roc_auc_score(target_test, prediction)