本文整理汇总了Python中sklearn.linear_model.RidgeClassifier.fit方法的典型用法代码示例。如果您正苦于以下问题:Python RidgeClassifier.fit方法的具体用法?Python RidgeClassifier.fit怎么用?Python RidgeClassifier.fit使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.linear_model.RidgeClassifier
的用法示例。
在下文中一共展示了RidgeClassifier.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: retrain_models
# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
def retrain_models(username):
train_x, train_y, body_x, body_y, head_x, head_y = model_retriever.retrieve_data_db(username)
b_train_x = []
b_train_y = numpy.concatenate([body_y, train_y])
for msg in (body_x + train_x):
b_train_x.append(extract_body_features(msg))
body_vec = TfidfVectorizer(norm="l2")
b_train_x = body_vec.fit_transform(b_train_x)
h_train_x = []
h_train_y = numpy.concatenate([head_y, train_y])
for msg in (head_x + train_x):
h_train_x.append(extract_header_features(msg))
head_vec = DictVectorizer()
h_train_x = head_vec.fit_transform(h_train_x)
body_model = LinearSVC(loss='l2', penalty="l2", dual=False, tol=1e-3)
head_model = RidgeClassifier(tol=1e-2, solver="lsqr")
body_model.fit(b_train_x, b_train_y)
head_model.fit(h_train_x, h_train_y)
print("Finished training models for "+username+"...")
store_models(username, body_vec, body_model, head_vec, head_model)
示例2: train_and_predict_m8
# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
def train_and_predict_m8 (train, test, labels) :
## Apply basic concatenation + stemming
trainData, testData = stemmer_clean (train, test, stemmerEnableM7, stemmer_type = 'porter')
## TF-IDF transform with sub-linear TF and stop-word removal
tfv = TfidfVectorizer(min_df = 5, max_features = None, strip_accents = 'unicode', analyzer = 'word', token_pattern = r'\w{1,}', ngram_range = (1, 5), smooth_idf = 1, sublinear_tf = 1, stop_words = ML_STOP_WORDS)
tfv.fit(trainData)
X = tfv.transform(trainData)
X_test = tfv.transform(testData)
## Create the classifier
print ("Fitting Ridge Classifer...")
clf = RidgeClassifier(class_weight = 'auto', alpha = 1, normalize = True)
## Create a parameter grid to search for best parameters for everything in the pipeline
param_grid = {'alpha' : [0.1, 0.3, 1, 3, 10], 'normalize' : [True, False]}
## Predict model with best parameters optimized for quadratic_weighted_kappa
if (gridSearch) :
model = perform_grid_search (clf, param_grid, X, labels)
pred = model.predict(X_test)
else :
clf.fit(X, labels)
pred = clf.predict(X_test)
return pred
示例3: run
# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
def run(input_train, input_test, output_name):
"""
Takes a file path as input, a file path as output, and produces a sorted csv of
item IDs for Kaggle submission
-------
input_train : 'full path of the training file'
input_test : 'full path of the testing file'
output_name : 'full path of the output file'
"""
data = pd.read_table(input_train)
test = pd.read_table(input_test)
testItemIds = test.itemid
response = data.is_blocked
dummies = sparse.csc_matrix(pd.get_dummies(data.subcategory))
pretestdummies = pd.get_dummies(test.subcategory)
testdummies = sparse.csc_matrix(pretestdummies.drop(['Растения', 'Товары для компьютера'],axis=1))
words = np.array(data.description,str)
testwords = np.array(test.description,str)
del data, test
vect = text.CountVectorizer(decode_error = u'ignore', strip_accents='unicode', ngram_range=(1,2))
corpus = np.concatenate((words, testwords))
vect.fit(corpus)
counts = vect.transform(words)
features = sparse.hstack((dummies,counts))
clf = RidgeClassifier()
clf.fit(features, response)
testcounts = vect.transform(testwords)
testFeatures = sparse.hstack((testdummies,testcounts))
predicted_scores = clf.predict_proba(testFeatures).T[1]
f = open(output_name,'w')
f.write("id\n")
for pred_score, item_id in sorted(zip(predicted_scores, testItemIds), reverse = True):
f.write("%d\n" % (item_id))
f.close()
示例4: validate
# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
def validate(input_train, rows=True, test=0.25):
"""
Takes file as input and returns classification report, average precision, and
AUC for a bigram model. By default, loads all rows of a dataset, trains on .75,
and tests on .25.
----
input_train : 'full path of the file you are loading'
rows : True - loads all rows; insert an int for specific number of rows
test : float proportion of dataset used for testing
"""
if rows == True:
data = pd.read_table(input_train)
else:
data = pd.read_table(input_train, nrows = rows)
response = data.is_blocked
dummies = sparse.csc_matrix(pd.get_dummies(data.subcategory))
words = np.array(data.description,str)
del data
vect = text.CountVectorizer(decode_error = u'ignore',strip_accents='unicode',ngram_range=(1,2))
counts = vect.fit_transform(words)
features = sparse.hstack((dummies,counts))
features_train, features_test, target_train, target_test = train_test_split(features, response, test_size = test)
clf = RidgeClassifier()
clf.fit(features_train, target_train)
prediction = clf.predict(features_test)
return classification_report(target_test, prediction), average_precision_score(target_test, prediction), roc_auc_score(target_test, prediction)
示例5: Eval
# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
def Eval(XTrain, YTrain, XTest, YTest, clf, return_predicted_labels=False):
"""
Inputs:
XTrain - N by D matrix of training data vectors
YTrain - N by 1 matrix of training class labels
XTest - M by D matrix of testin data vectors
YTrain - M by 1 matrix of testing class labels
clstr - the clustering function
either the string = "KMeans" or "GMM"
or a sklearn clustering instance
with the methods .fit and
Outputs:
A tuple containing (in the following order):
Accuracy
Overall Precision
Overall Recall
Overall F1 score
Avg. Precision per class
Avg. Recall per class
F1 Score
Precision per class
Recall per class
F1 Score per class
(if return_predicted_labels)
predicted class labels for each row in XTest
"""
if type(clf) == str:
if 'ridge' in clf.lower():
clf = RidgeClassifier(tol=1e-2, solver="lsqr")
elif "perceptron" in clf.lower():
clf = Perceptron(n_iter=50)
elif "passive aggressive" in clf.lower() or 'passive-aggressive' in clf.lower():
clf = PassiveAggressiveClassifier(n_iter=50)
elif 'linsvm' in clf.lower() or 'linearsvm' in clf.lower() or 'linearsvc' in clf.lower():
clf = LinearSVC()
elif 'svm' in clf.lower() or 'svc' in clf.lower():
clf = SVC()
elif 'sgd' in clf.lower():
clf = SGDClassifier()
clf.fit(XTrain, YTrain)
YPred = clf.predict(XTest)
accuracy = sklearn.metrics.accuracy_score(YTest, YPred)
(overall_precision, overall_recall, overall_f1, support) = sklearn.metrics.precision_recall_fscore_support(YTest, YPred, average='micro')
(precision_per_class, recall_per_class, f1_per_class, support_per_class) = sklearn.metrics.precision_recall_fscore_support(YTest, YPred)
avg_precision_per_class = np.mean(precision_per_class)
avg_recall_per_class = np.mean(recall_per_class)
avg_f1_per_class = np.mean(f1_per_class)
del clf
if return_predicted_labels:
return (accuracy, overall_precision, overall_recall, overall_f1, avg_precision_per_class, avg_recall_per_class, avg_f1_per_class, precision_per_class, recall_per_class, f1_per_class, YPred)
else:
return (accuracy, overall_precision, overall_recall, overall_f1, avg_precision_per_class, avg_recall_per_class, avg_f1_per_class, precision_per_class, recall_per_class, f1_per_class)
示例6: get_optimal_blend_weigth
# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
def get_optimal_blend_weigth(exp_, best_param_,
folder, fname, model_fname):
clf = RidgeClassifier()
X_test, y_test = exp_.get_test_data()
clf.set_params(**best_param_)
clf.fit(X_test, y_test)
# dump2csv optimal linear weight
names = np.append(np.array(['intercept'], dtype='S100'), X_test.columns.values)
coefs = np.append(clf.intercept_, clf.coef_).astype(np.float64)
optimal_linear_weight = pd.DataFrame(coefs.reshape(1,len(coefs)), columns=names)
optimal_linear_weight.to_csv(os.path.join(Config.get_string('data.path'),
folder,
fname), index=False)
# dump2cpkle for ridge model
model_fname = os.path.join(Config.get_string('data.path'), folder, model_fname)
with gzip.open(model_fname, 'wb') as gf:
cPickle.dump(clf, gf, cPickle.HIGHEST_PROTOCOL)
return True
示例7: Predict
# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
def Predict():
print('\nThere are %d new deals') % n_test
# Using the KNN classifier
clf_KNN = KNeighborsClassifier(n_neighbors=3) # KNN doesnot work even if k has been tuned
#clf_KNN = KNeighborsClassifier(n_neighbors=7)
#clf_KNN = KNeighborsClassifier(n_neighbors=11)
clf_KNN.fit(Corpus_train, Y_train)
Y_pred_KNN = clf_KNN.predict(Corpus_test)
print_rate(Y_test, Y_pred_KNN, n_test, 'KNNClassifier')
# Using the SVM classifier
clf_SVM = svm.SVC()
clf_SVM.fit(Corpus_train, Y_train)
Y_pred_SVM = clf_SVM.predict(Corpus_test)
print_rate(Y_test, Y_pred_SVM, n_test, 'SVMClassifier')
# Using the Ridge classifier
clf_RC = RidgeClassifier(tol=0.01, solver="lsqr")
#clf_RC = RidgeClassifier(tol=0.1, solver="lsqr")
clf_RC.fit(Corpus_train, Y_train)
Y_pred_RC = clf_RC.predict(Corpus_test)
print_rate(Y_test, Y_pred_RC, n_test, 'RidgeClassifier')
# won't consider Random Forests or Decision Trees beacause they work bad for high sparse dimensions
# Using the Multinomial Naive Bayes classifier
# I expect that this MNB classifier will do the best since it is designed for occurrence counts features
#clf_MNB = MultinomialNB(alpha=0.01) #smoothing parameter = 0.01 is worse than 0.1
clf_MNB = MultinomialNB(alpha=0.1)
#clf_MNB = MultinomialNB(alpha=0.3) #a big smoothing rate doesnot benefit the model
#clf_MNB = MultinomialNB(alpha=0.2) #or alpha = 0.05 can generate the best outcome
clf_MNB.fit(Corpus_train, Y_train)
Y_pred_MNB = clf_MNB.predict(Corpus_test)
print_rate(Y_test, Y_pred_MNB, n_test, 'MultinomialNBClassifier')
示例8: test_default_configuration_classify
# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
def test_default_configuration_classify(self):
for i in range(2):
X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits',
make_sparse=False)
configuration_space = ExtraTreesPreprocessor.get_hyperparameter_search_space()
default = configuration_space.get_default_configuration()
preprocessor = ExtraTreesPreprocessor(random_state=1,
**{hp_name: default[hp_name]
for hp_name in default})
preprocessor.fit(X_train, Y_train)
X_train_trans = preprocessor.transform(X_train)
X_test_trans = preprocessor.transform(X_test)
# fit a classifier on top
classifier = RidgeClassifier()
predictor = classifier.fit(X_train_trans, Y_train)
predictions = predictor.predict(X_test_trans)
accuracy = sklearn.metrics.accuracy_score(predictions, Y_test)
self.assertAlmostEqual(accuracy, 0.87310261080752882, places=2)
示例9: test_default_configuration_classify
# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
def test_default_configuration_classify(self):
for i in range(5):
X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits',
make_sparse=False)
configuration_space = KernelPCA.get_hyperparameter_search_space()
default = configuration_space.get_default_configuration()
preprocessor = KernelPCA(random_state=1,
**{hp_name: default[hp_name] for hp_name in
default if default[hp_name] is not None})
preprocessor.fit(X_train, Y_train)
X_train_trans = preprocessor.transform(X_train)
X_test_trans = preprocessor.transform(X_test)
# fit a classifier on top
classifier = RidgeClassifier()
predictor = classifier.fit(X_train_trans, Y_train)
predictions = predictor.predict(X_test_trans)
accuracy = sklearn.metrics.accuracy_score(predictions, Y_test)
self.assertAlmostEqual(accuracy, 0.096539162112932606)
示例10: test_default_configuration_classify
# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
def test_default_configuration_classify(self):
for i in range(2):
X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits',
make_sparse=True)
configuration_space = TruncatedSVD.get_hyperparameter_search_space()
default = configuration_space.get_default_configuration()
preprocessor = TruncatedSVD(random_state=1,
**{hp_name: default[hp_name]
for hp_name in
default if default[
hp_name] is not None})
preprocessor.fit(X_train, Y_train)
X_train_trans = preprocessor.transform(X_train)
X_test_trans = preprocessor.transform(X_test)
# fit a classifier on top
classifier = RidgeClassifier()
predictor = classifier.fit(X_train_trans, Y_train)
predictions = predictor.predict(X_test_trans)
accuracy = sklearn.metrics.accuracy_score(predictions, Y_test)
self.assertAlmostEqual(accuracy, 0.44201578627808136, places=2)
示例11: len
# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
data = [ i for i in csv.reader(file(train_file, 'rb')) ]
data = data[1:] # remove header
random.shuffle(data)
X = np.array([ i[1:] for i in data ]).astype(float)
Y = np.array([ i[0] for i in data ]).astype(int)
train_cutoff = len(data) * 3/4
X_train = X[:train_cutoff]
Y_train = Y[:train_cutoff]
X_test = X[train_cutoff:]
Y_test = Y[train_cutoff:]
classifier = RidgeClassifier(normalize = True, alpha = 1)
classifier = classifier.fit(X_train, Y_train)
print 'Training error : %s' % (classifier.fit(X_train, Y_train).score(X_train, Y_train))
Y_predict = classifier.predict(X_test)
equal = 0
for i in xrange(len(Y_predict)):
if Y_predict[i] == Y_test[i]:
equal += 1
print 'Accuracy = %s' % (float(equal)/len(Y_predict))
示例12: print
# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
remove = ()
X_train = cityName;
print('Creating the vectorizer and chosing a transform (from raw text to feature)')
vect= TfidfVectorizer(sublinear_tf=True, max_df=0.5)
#vect=CountVectorizer(min_n=1,max_n=2,max_features=1000);
X_train = vect.fit_transform(X_train)
cityClass = RidgeClassifier(tol=1e-7)
countryClass = RidgeClassifier(tol=1e-7)
print('Creating a classifier for cities')
cityClass.fit(X_train,cityCode)
print('Creating a classifier for countries')
countryClass.fit(X_train,countryCode)
print('testing the performance');
testCityNames = vect.transform(cityNameTest);
predictionsCity = countryClass.predict(testCityNames);
predictionsCountry = cityClass.predict(testCityNames);
with open('predictions.csv','w') as csvfile:
writer = csv.writer(csvfile)
#for ind in range(0,len(predictionsCountry)):
# writer.writerow([str(predictionsCountry[ind]),str(predictionsCity[ind])])
for predCountry,predCity in zip(predictionsCountry,predictionsCity):
示例13: time
# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
X_test = X_test_summary+X_test_title+X_test_author
duration = time() - t0
print("n_samples: %d, n_features: %d" % X_test.shape)
print("Done in %fs" % (duration))
def writeToDisk(predn,clfname):
target="./"+clfname+".txt"
target=open(target,'w')
target.write("{}\t{}\n".format("record_id", "topic"))
for x in zip(testID, predn):
target.write("{}\t{}\n".format(x[0], x[1]))
target.close()
print(clfname," output written to disk.")
clf1=RidgeClassifier(tol=1e-2, solver="lsqr") #Ridge Classifier
clf1.fit(X_train, y_train)
pred = clf1.predict(X_test)
writeToDisk(pred,"RidgeClassifier")
clf2=MultinomialNB(alpha=.01) #Naive Bayes classifier
clf2.fit(X_train, y_train)
pred = clf2.predict(X_test)
writeToDisk(pred,"MultinomialNB")
clf3=BernoulliNB(alpha=.01) #Naive Bayes(Bernoulli) classifier
clf3.fit(X_train, y_train)
pred = clf3.predict(X_test)
writeToDisk(pred,"BernoulliNB")
clf4=KNeighborsClassifier(n_neighbors=10) #KNeighbors Classifier
clf4.fit(X_train, y_train)
示例14: get_ridge_plot
# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
def get_ridge_plot(best_param_, experiment_,
param_keys_, param_vals_,
png_folder,
png_fname,
score_threshold=0.8):
parameters = dict(zip(param_keys_, param_vals_))
del parameters['model_type']
clf = RidgeClassifier()
X_train, y_train = experiment_.get_train_data()
clf.set_params(**best_param_)
clf.fit(X_train, y_train)
best_alpha = best_param_['alpha']
result = {'alphas':[],
'coefs':np.zeros( (len(parameters['alpha']), len(X_train.columns.values) + 1) ),
'scores':[],
'score':None}
for i, alpha in enumerate(parameters.get('alpha',None)):
result['alphas'].append(alpha)
del best_param_['alpha']
best_param_['alpha'] = alpha
clf.set_params(**best_param_)
clf.fit(X_train, y_train)
# regularization path
tmp = np.array([0 for j in xrange(len(X_train.columns.values) + 1)], dtype=np.float32)
if best_param_['fit_intercept']:
tmp = np.append(clf.intercept_, clf.coef_)
else:
tmp[1:] = clf.intercept_
result['coefs'][i,:] = tmp
result['scores'].append(experiment_.get_proba(clf, X_train))
del X_train, y_train
# 2.
tmp_len = len(experiment_.get_data_col_name())
index2feature = dict(zip(np.arange(1, tmp_len + 1),
experiment_.get_data_col_name()))
if best_param_['fit_intercept']:
index2feature[0] = 'intercept'
# 3. plot
gs = GridSpec(2,2)
ax1 = plt.subplot(gs[:,0])
ax2 = plt.subplot(gs[0,1])
ax3 = plt.subplot(gs[1,1])
# 3.1 feature importance
labels = np.append(np.array(['intercept'], dtype='S100'), experiment_.get_data_col_name())
nrows, ncols = result['coefs'].shape
for ncol in xrange(ncols):
ax1.plot(np.array(result['alphas']), result['coefs'][:,ncol], label = labels[ncol])
ax1.legend(loc='best')
ax1.set_xscale('log')
ax1.set_title("Regularization Path:%1.3e" % (best_alpha))
ax1.set_xlabel("alpha", fontsize=10)
# 3.2 PDF
X_test, y_test = experiment_.get_test_data()
result['score'] = clf.decision_function(X_test)
sns.distplot(result['score'], kde=False, rug=False, ax=ax2)
ax2.set_title("PDF : Decision_Function")
# 3.3 CDF
num_bins = 100
try:
counts, bin_edges = np.histogram(result['score'], bins=num_bins, normed=True)
except:
counts, bin_edges = np.histogram(result['score'], normed=True)
cdf = np.cumsum(counts)
ax3.plot(bin_edges[1:], cdf / cdf.max())
ax3.set_title("CDF")
ax3.set_xlabel("Decision_Function:Confidence_Score", fontsize=10)
png_fname = os.path.join(Config.get_string('data.path'), png_folder, png_fname)
plt.tight_layout()
plt.savefig(png_fname)
plt.close()
return True
示例15: KNeighborsClassifier
# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
#clf_KNN = KNeighborsClassifier(n_neighbors=7)
#clf_KNN = KNeighborsClassifier(n_neighbors=11)
clf_KNN.fit(Corpus_train, Y_train)
Y_pred_KNN = clf_KNN.predict(Corpus_test)
print_rate(Y_test, Y_pred_KNN, n_test, 'KNNClassifier')
# Using the SVM classifier
clf_SVM = svm.SVC()
clf_SVM.fit(Corpus_train, Y_train)
Y_pred_SVM = clf_SVM.predict(Corpus_test)
print_rate(Y_test, Y_pred_SVM, n_test, 'SVMClassifier')
# Using the Ridge classifier
clf_RC = RidgeClassifier(tol=0.01, solver="lsqr")
#clf_RC = RidgeClassifier(tol=0.1, solver="lsqr")
clf_RC.fit(Corpus_train, Y_train)
Y_pred_RC = clf_RC.predict(Corpus_test)
print_rate(Y_test, Y_pred_RC, n_test, 'RidgeClassifier')
# won't consider Random Forests or Decision Trees beacause they work bad for high sparse dimensions
# Using the Multinomial Naive Bayes classifier
# I expect that this MNB classifier will do the best since it is designed for occurrence counts features
#clf_MNB = MultinomialNB(alpha=0.01) #smoothing parameter = 0.01 is worse than 0.1
clf_MNB = MultinomialNB(alpha=0.1)
#clf_MNB = MultinomialNB(alpha=0.3) #a big smoothing rate doesnot benefit the model
#clf_MNB = MultinomialNB(alpha=0.2) #or alpha = 0.05 can generate the best outcome
clf_MNB.fit(Corpus_train, Y_train)
Y_pred_MNB = clf_MNB.predict(Corpus_test)
print_rate(Y_test, Y_pred_MNB, n_test, 'MultinomialNBClassifier')