本文整理匯總了Python中sklearn.linear_model.RidgeClassifier.predict方法的典型用法代碼示例。如果您正苦於以下問題:Python RidgeClassifier.predict方法的具體用法?Python RidgeClassifier.predict怎麽用?Python RidgeClassifier.predict使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類sklearn.linear_model.RidgeClassifier
的用法示例。
在下文中一共展示了RidgeClassifier.predict方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: train_and_predict_m8
# 需要導入模塊: from sklearn.linear_model import RidgeClassifier [as 別名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 別名]
def train_and_predict_m8 (train, test, labels) :
## Apply basic concatenation + stemming
trainData, testData = stemmer_clean (train, test, stemmerEnableM7, stemmer_type = 'porter')
## TF-IDF transform with sub-linear TF and stop-word removal
tfv = TfidfVectorizer(min_df = 5, max_features = None, strip_accents = 'unicode', analyzer = 'word', token_pattern = r'\w{1,}', ngram_range = (1, 5), smooth_idf = 1, sublinear_tf = 1, stop_words = ML_STOP_WORDS)
tfv.fit(trainData)
X = tfv.transform(trainData)
X_test = tfv.transform(testData)
## Create the classifier
print ("Fitting Ridge Classifer...")
clf = RidgeClassifier(class_weight = 'auto', alpha = 1, normalize = True)
## Create a parameter grid to search for best parameters for everything in the pipeline
param_grid = {'alpha' : [0.1, 0.3, 1, 3, 10], 'normalize' : [True, False]}
## Predict model with best parameters optimized for quadratic_weighted_kappa
if (gridSearch) :
model = perform_grid_search (clf, param_grid, X, labels)
pred = model.predict(X_test)
else :
clf.fit(X, labels)
pred = clf.predict(X_test)
return pred
示例2: validate
# 需要導入模塊: from sklearn.linear_model import RidgeClassifier [as 別名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 別名]
def validate(input_train, rows=True, test=0.25):
"""
Takes file as input and returns classification report, average precision, and
AUC for a bigram model. By default, loads all rows of a dataset, trains on .75,
and tests on .25.
----
input_train : 'full path of the file you are loading'
rows : True - loads all rows; insert an int for specific number of rows
test : float proportion of dataset used for testing
"""
if rows == True:
data = pd.read_table(input_train)
else:
data = pd.read_table(input_train, nrows = rows)
response = data.is_blocked
dummies = sparse.csc_matrix(pd.get_dummies(data.subcategory))
words = np.array(data.description,str)
del data
vect = text.CountVectorizer(decode_error = u'ignore',strip_accents='unicode',ngram_range=(1,2))
counts = vect.fit_transform(words)
features = sparse.hstack((dummies,counts))
features_train, features_test, target_train, target_test = train_test_split(features, response, test_size = test)
clf = RidgeClassifier()
clf.fit(features_train, target_train)
prediction = clf.predict(features_test)
return classification_report(target_test, prediction), average_precision_score(target_test, prediction), roc_auc_score(target_test, prediction)
示例3: Eval
# 需要導入模塊: from sklearn.linear_model import RidgeClassifier [as 別名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 別名]
def Eval(XTrain, YTrain, XTest, YTest, clf, return_predicted_labels=False):
"""
Inputs:
XTrain - N by D matrix of training data vectors
YTrain - N by 1 matrix of training class labels
XTest - M by D matrix of testin data vectors
YTrain - M by 1 matrix of testing class labels
clstr - the clustering function
either the string = "KMeans" or "GMM"
or a sklearn clustering instance
with the methods .fit and
Outputs:
A tuple containing (in the following order):
Accuracy
Overall Precision
Overall Recall
Overall F1 score
Avg. Precision per class
Avg. Recall per class
F1 Score
Precision per class
Recall per class
F1 Score per class
(if return_predicted_labels)
predicted class labels for each row in XTest
"""
if type(clf) == str:
if 'ridge' in clf.lower():
clf = RidgeClassifier(tol=1e-2, solver="lsqr")
elif "perceptron" in clf.lower():
clf = Perceptron(n_iter=50)
elif "passive aggressive" in clf.lower() or 'passive-aggressive' in clf.lower():
clf = PassiveAggressiveClassifier(n_iter=50)
elif 'linsvm' in clf.lower() or 'linearsvm' in clf.lower() or 'linearsvc' in clf.lower():
clf = LinearSVC()
elif 'svm' in clf.lower() or 'svc' in clf.lower():
clf = SVC()
elif 'sgd' in clf.lower():
clf = SGDClassifier()
clf.fit(XTrain, YTrain)
YPred = clf.predict(XTest)
accuracy = sklearn.metrics.accuracy_score(YTest, YPred)
(overall_precision, overall_recall, overall_f1, support) = sklearn.metrics.precision_recall_fscore_support(YTest, YPred, average='micro')
(precision_per_class, recall_per_class, f1_per_class, support_per_class) = sklearn.metrics.precision_recall_fscore_support(YTest, YPred)
avg_precision_per_class = np.mean(precision_per_class)
avg_recall_per_class = np.mean(recall_per_class)
avg_f1_per_class = np.mean(f1_per_class)
del clf
if return_predicted_labels:
return (accuracy, overall_precision, overall_recall, overall_f1, avg_precision_per_class, avg_recall_per_class, avg_f1_per_class, precision_per_class, recall_per_class, f1_per_class, YPred)
else:
return (accuracy, overall_precision, overall_recall, overall_f1, avg_precision_per_class, avg_recall_per_class, avg_f1_per_class, precision_per_class, recall_per_class, f1_per_class)
示例4: Predict
# 需要導入模塊: from sklearn.linear_model import RidgeClassifier [as 別名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 別名]
def Predict():
print('\nThere are %d new deals') % n_test
# Using the KNN classifier
clf_KNN = KNeighborsClassifier(n_neighbors=3) # KNN doesnot work even if k has been tuned
#clf_KNN = KNeighborsClassifier(n_neighbors=7)
#clf_KNN = KNeighborsClassifier(n_neighbors=11)
clf_KNN.fit(Corpus_train, Y_train)
Y_pred_KNN = clf_KNN.predict(Corpus_test)
print_rate(Y_test, Y_pred_KNN, n_test, 'KNNClassifier')
# Using the SVM classifier
clf_SVM = svm.SVC()
clf_SVM.fit(Corpus_train, Y_train)
Y_pred_SVM = clf_SVM.predict(Corpus_test)
print_rate(Y_test, Y_pred_SVM, n_test, 'SVMClassifier')
# Using the Ridge classifier
clf_RC = RidgeClassifier(tol=0.01, solver="lsqr")
#clf_RC = RidgeClassifier(tol=0.1, solver="lsqr")
clf_RC.fit(Corpus_train, Y_train)
Y_pred_RC = clf_RC.predict(Corpus_test)
print_rate(Y_test, Y_pred_RC, n_test, 'RidgeClassifier')
# won't consider Random Forests or Decision Trees beacause they work bad for high sparse dimensions
# Using the Multinomial Naive Bayes classifier
# I expect that this MNB classifier will do the best since it is designed for occurrence counts features
#clf_MNB = MultinomialNB(alpha=0.01) #smoothing parameter = 0.01 is worse than 0.1
clf_MNB = MultinomialNB(alpha=0.1)
#clf_MNB = MultinomialNB(alpha=0.3) #a big smoothing rate doesnot benefit the model
#clf_MNB = MultinomialNB(alpha=0.2) #or alpha = 0.05 can generate the best outcome
clf_MNB.fit(Corpus_train, Y_train)
Y_pred_MNB = clf_MNB.predict(Corpus_test)
print_rate(Y_test, Y_pred_MNB, n_test, 'MultinomialNBClassifier')
示例5:
# 需要導入模塊: from sklearn.linear_model import RidgeClassifier [as 別名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 別名]
# pre_all = 0.0
# rec_all = 0.0
f1_all = []
f5_all = []
acc_all = []
pre_all = []
rec_all = []
# level 1 evaluation
for train_index, test_index in kf1:
z_train, z_test = z[train_index], z[test_index]
y_train, y_test = y[train_index], y[test_index]
clf.fit(z_train, y_train)
pred = clf.predict(z_test)
# metrics
acc_score = metrics.zero_one_score(y_test, pred)
pre_score = metrics.precision_score(y_test, pred)
rec_score = metrics.recall_score(y_test, pred)
acc_all.append(acc_score)
pre_all.append(pre_score)
rec_all.append(rec_score)
# put the lists into numpy array for calculating the results
acc_all_array = np.asarray(acc_all)
pre_all_array = np.asarray(pre_all)
rec_all_array = np.asarray(rec_all)
示例6: BernoulliNB
# 需要導入模塊: from sklearn.linear_model import RidgeClassifier [as 別名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 別名]
# print
X_train = X_train.toarray()
X_test = X_test.toarray()
# clf = BernoulliNB(alpha=.1)
# clf = MultinomialNB(alpha=.01)
# clf = KNeighborsClassifier(n_neighbors=3)
clf = RidgeClassifier(tol=1e-1)
# clf = RandomForestClassifier(n_estimators=20, max_depth=None, min_split=3, random_state=42)
# clf = SGDClassifier(alpha=.01, n_iter=50, penalty="l2")
# clf = LinearSVC(loss='l2', penalty='l2', C=1000, dual=False, tol=1e-3)
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
print "y : ", y_test
print "pred : ", pred
print
# # print out top words for each category
# for i, category in enumerate(categories):
# top = np.argsort(clf.coef_[i, :])[-20:]
# print "%s: %s" % (category, " ".join(vocabulary[top]))
# print
# print
# print
pre_score = metrics.precision_score(y_test, pred)
示例7: the
# 需要導入模塊: from sklearn.linear_model import RidgeClassifier [as 別名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 別名]
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
X_den_train, X_den_test = X_den[train_index], X_den[test_index]
# feed models
clf_mNB.fit(X_train, y_train)
clf_kNN.fit(X_train, y_train)
clf_ridge.fit(X_train, y_train)
clf_lSVC.fit(X_train, y_train)
clf_SVC.fit(X_train, y_train)
# get prediction for this fold run
pred_mNB = clf_mNB.predict(X_test)
pred_kNN = clf_kNN.predict(X_test)
pred_ridge = clf_ridge.predict(X_test)
pred_lSVC = clf_lSVC.predict(X_test)
pred_SVC = clf_SVC.predict(X_test)
# update z array for each model
z_mNB = np.append(z_mNB , pred_mNB , axis=None)
z_kNN = np.append(z_kNN , pred_kNN , axis=None)
z_ridge = np.append(z_ridge , pred_ridge, axis=None)
z_lSVC = np.append(z_lSVC , pred_lSVC , axis=None)
z_SVC = np.append(z_SVC , pred_SVC , axis=None)
# putting z's from each model into one 2d matrix
# this is the (feature) input, similar as X, for level 1
# In level 1, y is still y.
# z = np.array([z_bNB, z_mNB, z_kNN, z_ridge, z_SGD, z_lSVC, z_SVC, z_tree, z_logis], dtype=np.int32)
示例8: classify
# 需要導入模塊: from sklearn.linear_model import RidgeClassifier [as 別名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 別名]
def classify(granularity=10):
trainDir = path.join(GEOTEXT_HOME, 'processed_data/' + str(granularity).strip() + '_clustered/')
testDir = path.join(GEOTEXT_HOME, 'processed_data/test')
data_train = load_files(trainDir, encoding=encoding)
target = data_train.target
data_test = load_files(testDir, encoding=encoding)
categories = data_train.target_names
def size_mb(docs):
return sum(len(s.encode(encoding)) for s in docs) / 1e6
data_train_size_mb = size_mb(data_train.data)
data_test_size_mb = size_mb(data_test.data)
print("%d documents - %0.3fMB (training set)" % (
len(data_train.data), data_train_size_mb))
print("%d documents - %0.3fMB (test set)" % (
len(data_test.data), data_test_size_mb))
print("%d categories" % len(categories))
print()
# split a training set and a test set
y_train = data_train.target
y_test = data_test.target
print("Extracting features from the training dataset using a sparse vectorizer")
t0 = time()
vectorizer = TfidfVectorizer(use_idf=True, norm='l2', binary=False, sublinear_tf=True, min_df=2, max_df=1.0, ngram_range=(1, 1), stop_words='english')
X_train = vectorizer.fit_transform(data_train.data)
duration = time() - t0
print("done in %fs at %0.3fMB/s" % (duration, data_train_size_mb / duration))
print("n_samples: %d, n_features: %d" % X_train.shape)
print()
print("Extracting features from the test dataset using the same vectorizer")
t0 = time()
X_test = vectorizer.transform(data_test.data)
duration = time() - t0
print("done in %fs at %0.3fMB/s" % (duration, data_test_size_mb / duration))
print("n_samples: %d, n_features: %d" % X_test.shape)
print()
chi = False
if chi:
k = 500000
print("Extracting %d best features by a chi-squared test" % 0)
t0 = time()
ch2 = SelectKBest(chi2, k=k)
X_train = ch2.fit_transform(X_train, y_train)
X_test = ch2.transform(X_test)
print("done in %fs" % (time() - t0))
print()
feature_names = np.asarray(vectorizer.get_feature_names())
# clf = LinearSVC(loss='l2', penalty='l2', dual=True, tol=1e-3)
clf = RidgeClassifier(tol=1e-2, solver="auto")
print('_' * 80)
print("Training: ")
print(clf)
t0 = time()
clf.fit(X_train, y_train)
train_time = time() - t0
print("train time: %0.3fs" % train_time)
t0 = time()
pred = clf.predict(X_test)
scores = clf.decision_function(X_test)
print scores.shape
print pred.shape
test_time = time() - t0
print("test time: %0.3fs" % test_time)
# score = metrics.f1_score(y_test, pred)
# print("f1-score: %0.3f" % score)
if hasattr(clf, 'coef_'):
print("dimensionality: %d" % clf.coef_.shape[1])
print("density: %f" % density(clf.coef_))
print("top 10 keywords per class:")
for i, category in enumerate(categories):
top10 = np.argsort(clf.coef_[i])[-10:]
print("%s: %s" % (category, " ".join(feature_names[top10])))
sumMeanDistance = 0
sumMedianDistance = 0
distances = []
confidences = []
randomConfidences = []
for i in range(0, len(pred)):
user = path.basename(data_test.filenames[i])
location = userLocation[user].split(',')
lat = float(location[0])
lon = float(location[1])
prediction = categories[pred[i]]
confidence = scores[i][pred[i]] - mean(scores[i])
#.........這裏部分代碼省略.........
示例9: the
# 需要導入模塊: from sklearn.linear_model import RidgeClassifier [as 別名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 別名]
X_train_train, X_train_test = X_train[train_index], X_train[test_index]
y_train_train, y_train_test = y_train[train_index], y_train[test_index]
# X_den_train, X_den_test = X_den[train_index], X_den[test_index]
# feed models
clf_mNB.fit(X_train_train, y_train_train)
clf_kNN.fit(X_train_train, y_train_train)
clf_ridge.fit(X_train_train, y_train_train)
clf_lSVC.fit(X_train_train, y_train_train)
clf_SVC.fit(X_train_train, y_train_train)
# get prediction for this fold run
pred_mNB = clf_mNB.predict(X_train_test)
pred_kNN = clf_kNN.predict(X_train_test)
pred_ridge = clf_ridge.predict(X_train_test)
pred_lSVC = clf_lSVC.predict(X_train_test)
pred_SVC = clf_SVC.predict(X_train_test)
# update z array for each model
z_mNB = np.append(z_mNB , pred_mNB , axis=None)
z_kNN = np.append(z_kNN , pred_kNN , axis=None)
z_ridge = np.append(z_ridge , pred_ridge, axis=None)
z_lSVC = np.append(z_lSVC , pred_lSVC , axis=None)
z_SVC = np.append(z_SVC , pred_SVC , axis=None)
# putting z's from each model into one 2d matrix
# this is the (feature) input, similar as X, for level 1
# In level 1, y is still y.
# z = np.array([z_bNB, z_mNB, z_kNN, z_ridge, z_SGD, z_lSVC, z_SVC, z_tree, z_logis], dtype=np.int32)
示例10: KNeighborsClassifier
# 需要導入模塊: from sklearn.linear_model import RidgeClassifier [as 別名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 別名]
#clf_KNN = KNeighborsClassifier(n_neighbors=11)
clf_KNN.fit(Corpus_train, Y_train)
Y_pred_KNN = clf_KNN.predict(Corpus_test)
print_rate(Y_test, Y_pred_KNN, n_test, 'KNNClassifier')
# Using the SVM classifier
clf_SVM = svm.SVC()
clf_SVM.fit(Corpus_train, Y_train)
Y_pred_SVM = clf_SVM.predict(Corpus_test)
print_rate(Y_test, Y_pred_SVM, n_test, 'SVMClassifier')
# Using the Ridge classifier
clf_RC = RidgeClassifier(tol=0.01, solver="lsqr")
#clf_RC = RidgeClassifier(tol=0.1, solver="lsqr")
clf_RC.fit(Corpus_train, Y_train)
Y_pred_RC = clf_RC.predict(Corpus_test)
print_rate(Y_test, Y_pred_RC, n_test, 'RidgeClassifier')
# won't consider Random Forests or Decision Trees beacause they work bad for high sparse dimensions
# Using the Multinomial Naive Bayes classifier
# I expect that this MNB classifier will do the best since it is designed for occurrence counts features
#clf_MNB = MultinomialNB(alpha=0.01) #smoothing parameter = 0.01 is worse than 0.1
clf_MNB = MultinomialNB(alpha=0.1)
#clf_MNB = MultinomialNB(alpha=0.3) #a big smoothing rate doesnot benefit the model
#clf_MNB = MultinomialNB(alpha=0.2) #or alpha = 0.05 can generate the best outcome
clf_MNB.fit(Corpus_train, Y_train)
Y_pred_MNB = clf_MNB.predict(Corpus_test)
print_rate(Y_test, Y_pred_MNB, n_test, 'MultinomialNBClassifier')
#score = metrics.f1_score(Y_test, Y_pred_MNB)
示例11: time
# 需要導入模塊: from sklearn.linear_model import RidgeClassifier [as 別名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 別名]
duration = time() - t0
print("n_samples: %d, n_features: %d" % X_test.shape)
print("Done in %fs" % (duration))
def writeToDisk(predn,clfname):
target="./"+clfname+".txt"
target=open(target,'w')
target.write("{}\t{}\n".format("record_id", "topic"))
for x in zip(testID, predn):
target.write("{}\t{}\n".format(x[0], x[1]))
target.close()
print(clfname," output written to disk.")
clf1=RidgeClassifier(tol=1e-2, solver="lsqr") #Ridge Classifier
clf1.fit(X_train, y_train)
pred = clf1.predict(X_test)
writeToDisk(pred,"RidgeClassifier")
clf2=MultinomialNB(alpha=.01) #Naive Bayes classifier
clf2.fit(X_train, y_train)
pred = clf2.predict(X_test)
writeToDisk(pred,"MultinomialNB")
clf3=BernoulliNB(alpha=.01) #Naive Bayes(Bernoulli) classifier
clf3.fit(X_train, y_train)
pred = clf3.predict(X_test)
writeToDisk(pred,"BernoulliNB")
clf4=KNeighborsClassifier(n_neighbors=10) #KNeighbors Classifier
clf4.fit(X_train, y_train)
pred = clf4.predict(X_test)
示例12: print
# 需要導入模塊: from sklearn.linear_model import RidgeClassifier [as 別名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 別名]
X_train = cityName;
print('Creating the vectorizer and chosing a transform (from raw text to feature)')
vect= TfidfVectorizer(sublinear_tf=True, max_df=0.5)
#vect=CountVectorizer(min_n=1,max_n=2,max_features=1000);
X_train = vect.fit_transform(X_train)
cityClass = RidgeClassifier(tol=1e-7)
countryClass = RidgeClassifier(tol=1e-7)
print('Creating a classifier for cities')
cityClass.fit(X_train,cityCode)
print('Creating a classifier for countries')
countryClass.fit(X_train,countryCode)
print('testing the performance');
testCityNames = vect.transform(cityNameTest);
predictionsCity = countryClass.predict(testCityNames);
predictionsCountry = cityClass.predict(testCityNames);
with open('predictions.csv','w') as csvfile:
writer = csv.writer(csvfile)
#for ind in range(0,len(predictionsCountry)):
# writer.writerow([str(predictionsCountry[ind]),str(predictionsCity[ind])])
for predCountry,predCity in zip(predictionsCountry,predictionsCity):
writer.writerow([predCountry,predCity])
示例13: the
# 需要導入模塊: from sklearn.linear_model import RidgeClassifier [as 別名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 別名]
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
X_den_train, X_den_test = X_den[train_index], X_den[test_index]
# feed models
clf_mNB.fit(X_train, y_train)
# clf_kNN.fit(X_train, y_train)
clf_ridge.fit(X_train, y_train)
clf_lSVC.fit(X_train, y_train)
clf_SVC.fit(X_den_train, y_train)
# get prediction for this fold run
pred_mNB = clf_mNB.predict(X_test)
# pred_kNN = clf_kNN.predict(X_test)
pred_ridge = clf_ridge.predict(X_test)
pred_lSVC = clf_lSVC.predict(X_test)
pred_SVC = clf_SVC.predict(X_den_test)
# update z array for each model
z_mNB = np.append(z_mNB , pred_mNB , axis=None)
# z_kNN = np.append(z_kNN , pred_kNN , axis=None)
z_ridge = np.append(z_ridge , pred_ridge, axis=None)
z_lSVC = np.append(z_lSVC , pred_lSVC , axis=None)
z_SVC = np.append(z_SVC , pred_SVC , axis=None)
# putting z's from each model into one 2d matrix
# this is the (feature) input, similar as X, for level 1
# In level 1, y is still y.
# z = np.array([z_bNB, z_mNB, z_kNN, z_ridge, z_SGD, z_lSVC, z_SVC, z_tree, z_logis], dtype=np.int32)
示例14: train_test_split
# 需要導入模塊: from sklearn.linear_model import RidgeClassifier [as 別名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 別名]
#!/usr/bin/env python
"""
Ridge regression for Avito
"""
__author__ = "deniederhut"
__license__ = "GPL"
import numpy as np
import pandas as pd
from sklearn.linear_model import RidgeClassifier
from sklearn.metrics import classification_report
from sklearn.cross_validation import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score
data = pd.read_table('/Users/dillonniederhut/Desktop/avito_train.tsv',nrows=100000)
#replace with file path to your training data
features = pd.get_dummies(data.subcategory)
features_train, features_test, target_train, target_test =\
train_test_split(features, data.is_blocked, test_size = 0.25)
ridge = RidgeClassifier()
ridge.fit(features_train, target_train)
prediction = np.round(ridge.predict(features_test))
print classification_report(target_test, prediction)
print average_precision_score(target_test, prediction)
print roc_auc_score(target_test, prediction)
示例15: len
# 需要導入模塊: from sklearn.linear_model import RidgeClassifier [as 別名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 別名]
data = [ i for i in csv.reader(file(train_file, 'rb')) ]
data = data[1:] # remove header
random.shuffle(data)
X = np.array([ i[1:] for i in data ]).astype(float)
Y = np.array([ i[0] for i in data ]).astype(int)
train_cutoff = len(data) * 3/4
X_train = X[:train_cutoff]
Y_train = Y[:train_cutoff]
X_test = X[train_cutoff:]
Y_test = Y[train_cutoff:]
classifier = RidgeClassifier(normalize = True, alpha = 1)
classifier = classifier.fit(X_train, Y_train)
print 'Training error : %s' % (classifier.fit(X_train, Y_train).score(X_train, Y_train))
Y_predict = classifier.predict(X_test)
equal = 0
for i in xrange(len(Y_predict)):
if Y_predict[i] == Y_test[i]:
equal += 1
print 'Accuracy = %s' % (float(equal)/len(Y_predict))