本文整理汇总了Python中sklearn.linear_model.SGDClassifier.decision_function方法的典型用法代码示例。如果您正苦于以下问题:Python SGDClassifier.decision_function方法的具体用法?Python SGDClassifier.decision_function怎么用?Python SGDClassifier.decision_function使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.linear_model.SGDClassifier
的用法示例。
在下文中一共展示了SGDClassifier.decision_function方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: SGDGridSearch_OLD
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import decision_function [as 别名]
def SGDGridSearch_OLD():
# C=1 is best
cs = 10.0**np.arange(-9,9,1)
aucs = []
for c in cs:
clf = SGDClassifier(penalty='l1',alpha=c).fit(f_train, y_train)
probs = clf.decision_function(f_test)
fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs)
roc_auc = auc(fpr,tpr)
cstr = '%0.2e'%c
myplt = st.plotROC(fpr,tpr,roc_auc,
figure=False,
show=False,
returnplt=True,
showlegend=False,
title='Grid Search - SGD Classifier ROC Curve')
aucs.append(roc_auc)
best = 0
for i in range(len(cs)):
if aucs[i] > aucs[best]:
best = i
c = cs[best]
clf = SGDClassifier(penalty='l1',alpha=c).fit(f_train, y_train)
probs = clf.decision_function(f_test)
fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs)
myplt = st.plotROC(fpr,tpr,roc_auc,
legendlabel='Best C = %0.2e' % c,
figure=False,
show=False,
returnplt=True,
showlegend=True,
title='Grid Search - SGD Classifier ROC Curve')
myplt.show()
return clf, aucs
示例2: train_kaggle
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import decision_function [as 别名]
def train_kaggle(dataset, alg="rig", data="bow"):
train_x, train_y, test_x = dataset
print "shape for training data is", train_x.shape
if alg == "svm":
clf = SGDClassifier(verbose=1, n_jobs=2, n_iter=20)
elif alg == "svm_sq":
clf = SGDClassifier(verbose=1, n_jobs=2, n_iter=20, loss="squared_hinge")
elif alg == "log":
clf = LogisticRegression(verbose=1, n_jobs=2)
elif alg == "per":
clf = Perceptron(verbose=1, n_jobs=2, n_iter=25)
elif alg == "rig":
clf = RidgeClassifier()
elif alg == "pa":
clf = PassiveAggressiveClassifier(n_jobs=2, n_iter=25)
else:
raise NotImplementedError
print "training with %s..." % alg
clf.fit(train_x, train_y)
# clf.fit(validate_x, validate_y)
predicted = clf.predict(test_x)
save_csv(predicted, fname=alg + "_" + data)
if alg != "nb":
return clf.decision_function(train_x), clf.decision_function(test_x)
else:
return clf.predict_proba(train_x), clf.predict_proba(test_x)
示例3: plot_sgd_separator
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import decision_function [as 别名]
def plot_sgd_separator():
# we create 50 separable points
X, Y = make_blobs(n_samples=50, centers=2,
random_state=0, cluster_std=0.60)
# fit the model
clf = SGDClassifier(loss="hinge", alpha=0.01,
n_iter=200, fit_intercept=True)
clf.fit(X, Y)
# plot the line, the points, and the nearest vectors to the plane
xx = np.linspace(-1, 5, 10)
yy = np.linspace(-1, 5, 10)
X1, X2 = np.meshgrid(xx, yy)
Z = np.empty(X1.shape)
for (i, j), val in np.ndenumerate(X1):
x1 = val
x2 = X2[i, j]
p = clf.decision_function([x1, x2])
Z[i, j] = p[0]
levels = [-1.0, 0.0, 1.0]
linestyles = ['dashed', 'solid', 'dashed']
colors = 'k'
ax = plt.axes()
ax.contour(X1, X2, Z, levels, colors=colors, linestyles=linestyles)
ax.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)
ax.axis('tight')
示例4: kernelsvm
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import decision_function [as 别名]
class kernelsvm():
def __init__(self, theta0, alpha, loss_metric):
self.theta0 = theta0
self.alpha = alpha
self.loss_metric = loss_metric
def fit(self, X, y, idx_SR):
n_SR = len(idx_SR)
self.feature_map_nystroem = General_Nystroem(kernel='rbf', gamma=self.theta0, n_components=n_SR)
X_features = self.feature_map_nystroem.fit_transform(X,idx_SR)
print("fitting SGD")
self.clf = SGDClassifier(loss=self.loss_metric,alpha=self.alpha)
self.clf.fit(X_features, y)
print("fitting SGD finished")
def predict(self, X):
print("Predicting")
X_transform = self.feature_map_nystroem.transform(X)
return self.clf.predict(X_transform), X_transform
def decision_function(self, X):
# X should be the transformed input!
return self.clf.decision_function(X)
def err_rate(self, y_true, y_pred):
acc = accuracy_score(y_true, y_pred)
err_rate = 1.0-acc
return err_rate
def get_params(self):
return self.clf.get_params()
示例5: main
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import decision_function [as 别名]
def main(feature_pkl):
print 'Loading data...'
featureIndex, trainFeatures, trainTargets, trainItemIds, testFeatures, testItemIds = joblib.load(feature_pkl)
print 'Normalizing data...'
trainFeatures = sklearn.preprocessing.normalize(trainFeatures.tocsc(), norm='l2', axis=0)
testFeatures = sklearn.preprocessing.normalize(testFeatures.tocsc(), norm='l2', axis=0)
#trainSplit, testSplit = splitTuple
# Best estimator from grid search:
clf = SGDClassifier(alpha=3.16227766017e-08, class_weight='auto', epsilon=0.1,
eta0=0.0, fit_intercept=True, l1_ratio=0.15,
learning_rate='optimal', loss='log', n_iter=5, n_jobs=1,
penalty='elasticnet', power_t=0.5, random_state=None, shuffle=False,
verbose=0, warm_start=False)
print 'Fitting model...'
clf.fit(trainFeatures,trainTargets)
# Use probabilities or decision function to generate a ranking
predicted_scores = clf.decision_function(testFeatures)
with open(os.path.splitext(feature_pkl)[0]+'_testRanking.csv', 'w') as f:
f.write('id\n')
for pred_score, item_id in sorted(zip(predicted_scores, testItemIds), reverse = True):
f.write('%d\n' % (item_id))
# Turn estimator params into word clouds
features, indices = zip(*sorted(featureIndex.iteritems(), key=operator.itemgetter(1)))
coef_tuple = zip(clf.coef_[0],indices)
coef_sort = sorted(coef_tuple, reverse=True)
print 'Top 20 for illicit:'
wordle_print(coef_sort[:20],features)
print 'Top 20 for licit:'
wordle_print(coef_sort[-20:],features)
示例6: plot_sgd_classifier
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import decision_function [as 别名]
def plot_sgd_classifier(num_samples, clt_std):
#generation of data
X, y = make_blobs(n_samples=num_samples, centers=2, cluster_std=clt_std)
#fitting of data using logistic regression
clf = SGDClassifier(loss='log', alpha=0.01)
clf.fit(X, y)
#plotting of data
x_ = np.linspace(min(X[:, 0]), max(X[:, 0]), 10)
y_ = np.linspace(min(X[:, 1]), max(X[:, 1]), 10)
X_, Y_ = np.meshgrid(x_, y_)
Z = np.empty(X_.shape)
for (i, j), val in np.ndenumerate(X_):
x1 = val
x2 = Y_[i, j]
conf_score = clf.decision_function([x1, x2])
Z[i, j] = conf_score[0]
levels = [-1.0, 0, 1.0]
colors = 'k'
linestyles = ['dashed', 'solid', 'dashed']
ax = plt.axes()
plt.xlabel('X1')
plt.ylabel('X2')
ax.contour(X_, Y_, Z, colors=colors,
levels=levels, linestyles=linestyles, labels='Boundary')
ax.scatter(X[:, 0], X[:, 1], c=y)
示例7: sgd_classify
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import decision_function [as 别名]
def sgd_classify(self):
print "Stochastic Gradient Descent"
clf = SGDClassifier()
clf.fit(self.descr, self.target)
mean = clf.score(self.test_descr, self.test_target)
print "Mean : %3f" % mean
print "Probability ", clf.coef_
print "Mean of each feature per class ", clf.intercept_
print "Confidence Score ",clf.decision_function(self.descr)
print "Predict Probability ", clf.predict_proba(self.descr)
print "Transform ", clf.transform(self.descr)
示例8: run
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import decision_function [as 别名]
def run():
x_train,y_train,x_test = load_data()
X_train,Y_train,X_test,Y_test = split_data(x_train,y_train)
best_score_cv = 0
best_algo = ''
clf = SGDClassifier(loss="hinge", penalty="l2")
clf.fit(X_train,Y_train)
Y_pred = clf.decision_function(X_test)
if best_score_cv<metric(Y_test,Y_pred):
best_score_cv = metric(Y_test,Y_pred)
best_algo = 'hinge + l2'
for alpha in [0.0001,0.001, 0.01, 0.1]:
clf= Lasso(alpha=alpha)
clf.fit(X_train,Y_train)
Y_pred = clf.decision_function(X_test)
if best_score_cv<metric(Y_test,Y_pred):
best_score_cv = metric(Y_test,Y_pred)
best_algo = 'LASSO with alpha='+str(alpha)
clf = RandomForestClassifier(n_estimators=1000, max_depth=None, min_samples_split=1, random_state=0)
clf.fit(X_train,Y_train)
Y_pred = clf.predict_proba(X_test)
if best_score_cv<metric(Y_test,Y_pred[:,1]):
best_score_cv = metric(Y_test,Y_pred[:,1])
best_algo = 'randomforest with 100 trees'
print
print 'Thank you for running ML21 futurist meta-algorithm'
print
print '> the best algorithm is : '+best_algo
print
print '> the best cross-validation score is : '+str(best_score_cv)
print
print 'If you want, I can also do your breakfast.'
print
示例9: train
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import decision_function [as 别名]
def train(input_filename, num_train_examples, num_test_examples, block_size):
# Load initial training data and test data
X_train, y_train, X_test, y_test, scaler = loaddata(input_filename, num_test_examples, block_size)
# Feature generation using random forests
forest = RandomForestClassifier(n_estimators=150, n_jobs=-1)
forest.fit(X_train, y_train)
encoder = OneHotEncoder()
encoder.fit(forest.apply(X_train))
X_test = encoder.transform(forest.apply(X_test))
# Make sure that classes are weighted inversely to their frequencies
weights = float(y_train.shape[0]) / (2 * numpy.bincount(y_train))
class_weights = {0: weights[0], 1: weights[1]}
learner = SGDClassifier(
loss="hinge",
penalty="l2",
learning_rate="invscaling",
alpha=0.0001,
average=10 ** 4,
eta0=1.0,
class_weight=class_weights,
)
num_passes = 3
aucs = []
for j in range(num_passes):
for i in range(0, num_train_examples, block_size):
df = pandas.read_csv(input_filename, header=None, skiprows=i, nrows=block_size)
X_train = df.values[:, 1:]
X_train = scaler.transform(X_train)
X_train = encoder.transform(forest.apply(X_train))
y_train = numpy.array(df.values[:, 0], numpy.int)
del df
learner.partial_fit(X_train, y_train, classes=numpy.array([0, 1]))
y_pred_prob = learner.decision_function(X_test)
auc = roc_auc_score(y_test, y_pred_prob)
aucs.append([i + num_train_examples * j, auc])
print(aucs[-1])
df = pandas.DataFrame(aucs, columns=["Iterations", "AUC"])
df = df.set_index("Iterations")
return df
示例10: train_custom_one_vs_all
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import decision_function [as 别名]
def train_custom_one_vs_all(X_train,X_test,Y_train,topk):
#convert matrix to row for efficient splicing
Y_train = Y_train.tocsc()
tag_classifiers = []
num_training,numclasses = Y_train.shape
num_test_examples = X_test.shape[0]
# hold a vector mxk, containing top k prediction classes for each example, maintain m heaps for that
num_examples = X_test.shape[0]
num_classes = len(tag_classifiers)
topk_class_distances = []
for i in xrange(num_examples):
heap = []
topk_class_distances += [heap]
for j in xrange(numclasses):
# train on each class label for all the training examples
y = numpy.ravel(Y_train.getcol(j).todense());
clf = SGDClassifier(loss='hinge',penalty='l2',alpha=0.0001,fit_intercept=True,n_iter = 10,shuffle=True,n_jobs=4,learning_rate='optimal')
clf.fit(X_train,y);
print "Trained for class",j
# get the decision for all test examples
decision = clf.decision_function(X_test)
# for each test example add its decision value to the heap of top k decision values
for i in xrange(num_test_examples):
h = topk_class_distances[i]
if len(h) < topk: heapq.heappush(h,(decision[i],j))
else: heapq.heappushpop(h,(decision[i],j))
print "Predicted for class",j
#clean the decision values and store the class labels
class_label_indices = []
for i in xrange(num_examples):
topk_labels = [label for dist,label in topk_class_distances[i]]
class_label_indices += [topk_labels]
return class_label_indices
示例11: SGDC
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import decision_function [as 别名]
class SGDC(object):
def __init__(self, texts, classes, nlpdict):
# TODO: add list of smileys to texts/classes
self.s = SGDClassifier(loss="hinge", penalty="l1", shuffle=True, class_weight="auto")
if nlpdict:
self.dictionary = nlpdict
else:
self.dictionary = NLPDict(texts=texts)
self._train(texts, classes)
def _train(self, texts, classes):
vectors = self.dictionary.feature_vectors(texts)
self.s.fit(vectors, classes)
def classify(self, texts):
vectors = self.dictionary.feature_vectors(texts)
predictions = self.s.decision_function(vectors)
predictions = predictions / 20 + 0.5
predictions[predictions > 1] = 1
predictions[predictions < 0] = 0
return predictions
示例12: LearningModel
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import decision_function [as 别名]
class LearningModel(object):
"""
Represents the model that can be trained and later used to predict
keywords for unknown data
"""
def __init__(self, global_index, word2vec_model):
self.scaler = StandardScaler()
self.classifier = SGDClassifier(n_jobs=-1) # try loss log (logistic reg)
self.global_index = global_index
self.word2vec = word2vec_model
def maybe_fit_and_scale(self, matrix):
"""
If the scaler is not initialized, the fit() is performed on given data.
Exception is thrown if the data is not big enough. Input matrix is
scaled and returned.
:param matrix: matrix to be transformed
:return: scaled matrix
"""
if not hasattr(self.scaler, 'n_samples_seen_'):
if len(matrix) < 1000:
raise ValueError("Please user bigger batch size. "
"The feature matrix is too small "
"to fit the scaler.")
else:
self.scaler.fit(matrix)
return self.scaler.transform(matrix)
def partial_fit_classifier(self, input_matrix, output_vector):
"""
Fit the classifier on X, y matrices. Can be used for online training.
:param input_matrix: feature matrix
:param output_vector: vector of the same length as input_matrix
:return: None
"""
classes = np.array([0, 1], dtype=np.bool_)
# TODO Maybe initialize the classifier with this for balancing classes
# weights = compute_class_weight('balanced', classes, output_vector)
self.classifier = self.classifier.partial_fit(
input_matrix,
output_vector,
classes=classes,
)
def fit_classifier(self, input_matrix, output_vector):
"""
Fit the classifier on X, y matrices. Previous fit is discarded.
:param input_matrix: feature matrix
:param output_vector: vector of the same length as input_matrix
:return: None
"""
self.classifier = self.classifier.fit(input_matrix, output_vector)
def scale_and_predict(self, input_matrix):
"""
Predict output for given samples
:param input_matrix: a feature matrix
:return: matrix with predictions for each sample
"""
scaled_matrix = self.scaler.transform(input_matrix)
return self.classifier.predict(scaled_matrix)
def scale_and_predict_confidence(self, input_matrix):
"""
Predict confidence values for given samples
:param input_matrix: a feature matrix
:return: matrix with confidence values for each sample
"""
scaled_matrix = self.scaler.transform(input_matrix)
return self.classifier.decision_function(scaled_matrix)
def get_global_index(self):
""" Get the GlobalFrequencyIndex field. """
return self.global_index
示例13: cross_val_predict
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import decision_function [as 别名]
# cv=3,
# scoring="accuracy"))
y_train_pred = cross_val_predict(sgd_clf, X_train, y_train_5, cv=3)
# print(y_train_pred)
# print(y_train_5)
# print(confusion_matrix(y_train_5, y_train_pred))
# print("precision:\n",precision_score(y_train_5, y_train_pred))
# print("recall:\n",recall_score(y_train_5, y_train_pred))
# print("f1:\n", f1_score(y_train_5, y_train_pred))
y_scores = sgd_clf.decision_function([some_digit])
# print(y_scores)
threshold = 0
y_some_digit_pred = (y_scores > threshold)
# print(y_some_digit_pred)
threshold = 200000
y_some_digit_pred = (y_scores > threshold)
# print(y_some_digit_pred)
y_scores = cross_val_predict(sgd_clf, X_train, y_train_5, cv=3, method="decision_function")
precisions, recalls, thresholds = precision_recall_curve(y_train_5, y_scores)
示例14: SGDClassifier
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import decision_function [as 别名]
from sklearn.linear_model import SGDClassifier
enetloglike = SGDClassifier(loss="log", penalty="elasticnet",
alpha=0.0001, l1_ratio=0.15, class_weight='balanced')
enetloglike.fit(X, y)
enethinge = SGDClassifier(loss="hinge", penalty="elasticnet",
alpha=0.0001, l1_ratio=0.15, class_weight='balanced')
enetloglike.fit(X, y)
enethinge.fit(X, y)
print(np.corrcoef(enetloglike.coef_, enethinge.coef_))
# The weights vectors are highly correlated
print(np.corrcoef(enetloglike.decision_function(X), enethinge.decision_function(X)))
# The decision function are highly correlated
plt.plot(enetloglike.decision_function(X), enethinge.decision_function(X), "o")
'''
## Exercise
Compare predictions of Enet Logistic regression (LR) and Hinge Enet
- Compute the correlation between pairs of weights vectors.
- Compare the predictions of two classifiers using their decision function:
* Compute the correlation decision function.
* Plot the pairwise decision function of the classifiers.
示例15: print
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import decision_function [as 别名]
for f in train.columns:
if train[f].dtype=='object':
print(f)
lbl = preprocessing.LabelEncoder()
lbl.fit(list(train[f].values) + list(test[f].values))
train[f] = lbl.transform(list(train[f].values))
test[f] = lbl.transform(list(test[f].values))
features = [s for s in train.columns.ravel().tolist() if s != 'QuoteConversion_Flag']
print("Features: ", features)
print("Train a SGDClassifier model")
X_train, X_valid = train_test_split(train, test_size=0.01)
y_train = X_train['QuoteConversion_Flag']
y_valid = X_valid['QuoteConversion_Flag']
clf = SGDClassifier(loss="hinge", penalty="l2", n_jobs=-1)
clf.fit(X_train[features].values, y_train.values)
print("## Validating Data")
preds = clf.decision_function(X_valid[features])
auc_value = roc_auc_score(y_valid, preds)
print("ROC Score : " + str(auc_value))
print("## Predicting test data")
preds = clf.decision_function(test[features].values)
test["QuoteConversion_Flag"] = preds
test[['QuoteNumber',"QuoteConversion_Flag"]].to_csv('test_predictions.csv', index=False)