本文整理汇总了Python中sklearn.linear_model.SGDClassifier类的典型用法代码示例。如果您正苦于以下问题:Python SGDClassifier类的具体用法?Python SGDClassifier怎么用?Python SGDClassifier使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SGDClassifier类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: sgd_classifier
def sgd_classifier(V_train, y_train, V_val, y_val, V_test, y_test):
t0 = time.time()
print 'Building Random Forest model'
clf = SGDClassifier(n_iter = 50)
#clf = grid_search.GridSearchCV(svm_clf, parameters)
clf.fit(V_train, y_train)
#print clf.best_params_
t1 = time.time()
print 'Building Random Forest model ... Done', str(int((t1 - t0)*100)/100.)
print ''
p_val =clf.predict(V_val)
print 'Training accuracy on validation set', accuracy_score(y_val, p_val)
p_test = clf.predict(V_test)
print 'Accuracy on testing set'
print classification_report(y_test, p_test)
示例2: run_online_classifier
def run_online_classifier():
vect = HashingVectorizer(
decode_error='ignore',
n_features=2**21,
preprocessor=None,
tokenizer=tokenizer_streaming,
)
clf = SGDClassifier(loss='log', random_state=1, n_iter=1)
csv_filename = os.path.join('datasets', 'movie_data.csv')
doc_stream = stream_docs(path=csv_filename)
classes = np.array([0, 1])
for _ in range(45):
X_train, y_train = get_minibatch(doc_stream, size=1000)
if X_train is None:
break
else:
X_train = vect.transform(X_train)
clf.partial_fit(X_train, y_train, classes=classes)
X_test, y_test = get_minibatch(doc_stream, size=5000)
X_test = vect.transform(X_test)
print("Test accuracy: %.3f" % clf.score(X_test, y_test))
clf = clf.partial_fit(X_test, y_test)
示例3: test_underflow_or_overlow
def test_underflow_or_overlow():
with np.errstate(all="raise"):
# Generate some weird data with hugely unscaled features
rng = np.random.RandomState(0)
n_samples = 100
n_features = 10
X = rng.normal(size=(n_samples, n_features))
X[:, :2] *= 1e300
assert_true(np.isfinite(X).all())
# Use MinMaxScaler to scale the data without introducing a numerical
# instability (computing the standard deviation naively is not possible
# on this data)
X_scaled = MinMaxScaler().fit_transform(X)
assert_true(np.isfinite(X_scaled).all())
# Define a ground truth on the scaled data
ground_truth = rng.normal(size=n_features)
y = (np.dot(X_scaled, ground_truth) > 0.0).astype(np.int32)
assert_array_equal(np.unique(y), [0, 1])
model = SGDClassifier(alpha=0.1, loss="squared_hinge", n_iter=500)
# smoke test: model is stable on scaled data
model.fit(X_scaled, y)
assert_true(np.isfinite(model.coef_).all())
# model is numerically unstable on unscaled data
msg_regxp = (
r"Floating-point under-/overflow occurred at epoch #.*"
" Scaling input data with StandardScaler or MinMaxScaler"
" might help."
)
assert_raises_regexp(ValueError, msg_regxp, model.fit, X, y)
示例4: __init__
class LightModel:
def __init__(self,learningRate, numEpochs, ppenalty="l1", mustShuffle=True):
#Init scikit models
self.Classifier = SGDClassifier(penalty=ppenalty, loss='log', alpha=learningRate, n_iter = numEpochs, shuffle=mustShuffle)
def train(self, gen, v=False):
i = 0
for x, y in gen: #For each batch
self.Classifier.partial_fit(x, y, [0,1])
i += len(x)
if v : print(str(datetime.now())[:-7] , "example:", i)
def test(self, gen, v=False):
#init target and prediction arrays
ytot = np.array([])
ptot = np.array([])
#Get prediction for each batch
i = 0
for x,y in gen:
p = self.Classifier.predict_proba(x)
p = p.T[1].T #Keep column corresponding to probability of class 1
#Stack target and prediction for later analysis
ytot = np.hstack((ytot, y))
ptot = np.hstack((ptot, p))
i += y.shape[0]
if v : print(str(datetime.now())[:-7] , "example:", i)
if v: print("Score:", self.score(ytot, ptot))
return (ytot, ptot)
def score(self, target, prediction):
return llfun(target, prediction)
示例5: validate
def validate():
"""
Runs a 10-fold cross validation on the classifier, reporting
accuracy.
"""
trainDf = pd.read_csv("../NewData/train.csv")
X = np.matrix(pd.DataFrame(trainDf, index=None,
columns=["invited", "user_reco", "evt_p_reco", "evt_c_reco",
"user_pop", "frnd_infl", "evt_pop"]))
y = np.array(trainDf.interested)
nrows = len(trainDf)
kfold = KFold(nrows, 10)
avgAccuracy = 0
run = 0
for train, test in kfold:
Xtrain, Xtest, ytrain, ytest = X[train], X[test], y[train], y[test]
clf = SGDClassifier(loss="log", penalty="l2")
clf.fit(Xtrain, ytrain)
accuracy = 0
ntest = len(ytest)
for i in range(0, ntest):
yt = clf.predict(Xtest[i, :])
if yt == ytest[i]:
accuracy += 1
accuracy = accuracy / ntest
print "accuracy (run %d): %f" % (run, accuracy)
avgAccuracy += accuracy
run += 1
print "Average accuracy", (avgAccuracy / run)
示例6: do_classify
def do_classify():
corpus = MyCorpus()
# tfidf_model = TfidfModel(corpus)
corpus_idf = tfidf_model[corpus]
# corpus_lsi = lsi_model[corpus_idf]
num_terms = len(corpus.dictionary)
# num_terms = 400
corpus_sparse = matutils.corpus2csc(corpus_idf, num_terms).transpose(copy=False)
# print corpus_sparse.shape
# corpus_dense = matutils.corpus2dense(corpus_idf, len(corpus.dictionary))
# print corpus_dense.shape
penalty = "l2"
clf = SGDClassifier(loss="hinge", penalty=penalty, alpha=0.0001, n_iter=50, fit_intercept=True)
# clf = LinearSVC(loss='l2', penalty=penalty, dual=False, tol=1e-3)
y = np.array(corpus.cls_y)
# print y.shape
clf.fit(corpus_sparse, y)
filename = os.path.join(HERE, "sgdc_clf.pkl")
_ = joblib.dump(clf, filename, compress=9)
print "train completely"
X_test = []
X_label = []
for obj in SogouCorpus.objects.filter(id__in=corpus.test_y):
X_test.append(obj.tokens)
X_label.append(cls_ids[obj.classify])
# result = classifier.predict(obj.tokens)
test_corpus = [dictionary.doc2bow(s.split(",")) for s in X_test]
test_corpus = tfidf_model[test_corpus]
test_corpus = matutils.corpus2csc(test_corpus, num_terms).transpose(copy=False)
pred = clf.predict(test_corpus)
score = metrics.f1_score(X_label, pred)
print ("f1-score: %0.3f" % score)
示例7: classify_reviews
def classify_reviews():
import featurizer
import gen_training_data
import numpy as np
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import SGDClassifier
data = gen_training_data.gen_data();
stemmed_data = featurizer.stem(data);
tfidf= featurizer.tfidf(data);
clf = MultinomialNB().fit(tfidf['train_tfidf'], data['training_labels']);
predicted = clf.predict(tfidf['test_tfidf']);
num_wrong = 0;
tot = 0;
for expected, guessed in zip(data['testing_labels'], predicted):
if(expected-guessed != 0):
num_wrong += 1;
print("num_wrong: %d",num_wrong)
sgd_clf = SGDClassifier(loss='hinge', penalty='l2', alpha=1e-3, n_iter=5, random_state=42);
_ = sgd_clf.fit(tfidf['train_tfidf'], data['training_labels']);
sgd_pred = sgd_clf.predict(tfidf['test_tfidf']);
print np.mean(sgd_pred == data['testing_labels']);
stem_tfidf = featurizer.tfidf(stemmed_data);
_ = sgd_clf.fit(stem_tfidf['train_tfidf'], data['training_labels']);
sgd_stem_prd = sgd_clf.predict(stem_tfidf['test_tfidf']);
print np.mean(sgd_stem_prd==data['testing_labels']);
示例8: train
def train(docs, labels, regu=1, bg_weight=.1):
'''
:param docs: iterator of (title, body) pairs
:param labels: integer labels for docs (0 is weakly-negative)
:return: model
'''
num_topics=50
feas = map(extract_words, docs)
labels = np.array(list(labels), dtype=int)
idf=train_idf(feas)
X,vocab=extract_feas(feas, idf)
#lda=train_lda(X, vocab, num_topics)
#X=transform_lda(X, lda)
# set up sample weights
weights = balance_weights(labels, bg_weight)
labels=labels.copy()
labels[labels == 0] = 1
model=SGDClassifier(loss='log',
alpha=regu/len(labels),
fit_intercept=True,
n_iter=100,
shuffle=True)
model.fit(X, labels, sample_weight=weights)
#print accuracy(labels, model.predict(X))
return dict(idf=idf, logreg=model, lda=None)
示例9: crossvalidate
def crossvalidate(feas, labels, param):
labels = np.array(list(labels), dtype=int)
accs = []
for train_ids, valid_ids in StratifiedKFold(labels, 10):
idf=train_idf([feas[i] for i in train_ids])
X,vocab=extract_feas(feas, idf)
#lda=train_lda(X, vocab, num_topics)
#X=transform_lda(X, lda)
labels_train = labels[train_ids].copy()
weights = balance_weights(labels_train, param['bg_weight'])
labels_train[labels_train == 0] = 1
model=SGDClassifier(loss='log',
alpha=param['regu']/len(labels_train),
fit_intercept=True,
shuffle=True, n_iter=50)
model.fit(X[train_ids], labels_train, sample_weight=weights)
pp = model.predict_proba(X[valid_ids])
pred_labels = np.argmax(pp, 1)
pred_labels = model.classes_[pred_labels]
#a=accuracy(labels[valid_ids], pred_labels, 1)
# return all scores for "good" class
assert model.classes_[1] == 2
pred_scores = pp[:,1]
a=avg_precision(labels[valid_ids], pred_scores)
print '%.2f' % a,
accs.append(a)
return np.mean(accs)
示例10: plot_sgd_classifier
def plot_sgd_classifier(num_samples, clt_std):
#generation of data
X, y = make_blobs(n_samples=num_samples, centers=2, cluster_std=clt_std)
#fitting of data using logistic regression
clf = SGDClassifier(loss='log', alpha=0.01)
clf.fit(X, y)
#plotting of data
x_ = np.linspace(min(X[:, 0]), max(X[:, 0]), 10)
y_ = np.linspace(min(X[:, 1]), max(X[:, 1]), 10)
X_, Y_ = np.meshgrid(x_, y_)
Z = np.empty(X_.shape)
for (i, j), val in np.ndenumerate(X_):
x1 = val
x2 = Y_[i, j]
conf_score = clf.decision_function([x1, x2])
Z[i, j] = conf_score[0]
levels = [-1.0, 0, 1.0]
colors = 'k'
linestyles = ['dashed', 'solid', 'dashed']
ax = plt.axes()
plt.xlabel('X1')
plt.ylabel('X2')
ax.contour(X_, Y_, Z, colors=colors,
levels=levels, linestyles=linestyles, labels='Boundary')
ax.scatter(X[:, 0], X[:, 1], c=y)
示例11: kernelsvm
class kernelsvm():
def __init__(self, theta0, alpha, loss_metric):
self.theta0 = theta0
self.alpha = alpha
self.loss_metric = loss_metric
def fit(self, X, y, idx_SR):
n_SR = len(idx_SR)
self.feature_map_nystroem = General_Nystroem(kernel='rbf', gamma=self.theta0, n_components=n_SR)
X_features = self.feature_map_nystroem.fit_transform(X,idx_SR)
print("fitting SGD")
self.clf = SGDClassifier(loss=self.loss_metric,alpha=self.alpha)
self.clf.fit(X_features, y)
print("fitting SGD finished")
def predict(self, X):
print("Predicting")
X_transform = self.feature_map_nystroem.transform(X)
return self.clf.predict(X_transform), X_transform
def decision_function(self, X):
# X should be the transformed input!
return self.clf.decision_function(X)
def err_rate(self, y_true, y_pred):
acc = accuracy_score(y_true, y_pred)
err_rate = 1.0-acc
return err_rate
def get_params(self):
return self.clf.get_params()
示例12: run_SGD
def run_SGD(X, y, n_tr, n_te):
X_tr, y_tr, X_te, y_te = X[:n_tr], y[:n_tr], X[-n_te:], y[-n_te:]
penalties = ['hinge', 'log']
for p in penalties:
model = SGDClassifier(loss=p, penalty=None, n_iter=100).fit(X_tr, y_tr)
print 'Training, validation accuracy is %6.4f and %6.4f for %s loss' % \
(model.score(X_tr, y_tr), model.score(X_te, y_te), p)
示例13: stochasticGD
def stochasticGD(input_file,Output,test_size):
lvltrace.lvltrace("LVLEntree dans stochasticGD split_test")
ncol=tools.file_col_coma(input_file)
data = np.loadtxt(input_file, delimiter=',', usecols=range(ncol-1))
X = data[:,1:]
y = data[:,0]
n_samples, n_features = X.shape
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
print X_train.shape, X_test.shape
clf = SGDClassifier(loss="hinge", penalty="l2")
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
print "Stochastic Gradient Descent "
print "classification accuracy:", metrics.accuracy_score(y_test, y_pred)
print "precision:", metrics.precision_score(y_test, y_pred)
print "recall:", metrics.recall_score(y_test, y_pred)
print "f1 score:", metrics.f1_score(y_test, y_pred)
print "\n"
results = Output+"Stochastic_GD_metrics_test.txt"
file = open(results, "w")
file.write("Stochastic Gradient Descent estimator accuracy\n")
file.write("Classification Accuracy Score: %f\n"%metrics.accuracy_score(y_test, y_pred))
file.write("Precision Score: %f\n"%metrics.precision_score(y_test, y_pred))
file.write("Recall Score: %f\n"%metrics.recall_score(y_test, y_pred))
file.write("F1 Score: %f\n"%metrics.f1_score(y_test, y_pred))
file.write("\n")
file.write("True Value, Predicted Value, Iteration\n")
for n in xrange(len(y_test)):
file.write("%f,%f,%i\n"%(y_test[n],y_pred[n],(n+1)))
file.close()
title = "Stochastic Gradient Descent %f"%test_size
save = Output + "Stochastic_GD_confusion_matrix"+"_%s.png"%test_size
plot_confusion_matrix(y_test, y_pred,title,save)
示例14: train_stochaticGradientDescent
def train_stochaticGradientDescent(X, y, loss='hinge', penalty='l2', alpha=0.0001, l1_ratio=0.15,
fit_intercept=True, n_iter=5, shuffle=True, verbose=0,
epsilon=0.1, n_jobs=1, random_state=None, learning_rate='optimal',
eta0=0.0, power_t=0.5, class_weight=None, warm_start=False,
average=False):
clf = SGDClassifier(loss=loss,
penalty=penalty,
alpha=alpha,
l1_ratio=l1_ratio,
fit_intercept=fit_intercept,
n_iter=n_iter,
shuffle=shuffle,
verbose=verbose,
epsilon=epsilon,
n_jobs=n_jobs,
random_state=random_state,
learning_rate=learning_rate,
eta0=eta0,
power_t=power_t,
class_weight=class_weight,
warm_start=warm_start,
average=average
)
clf = clf.fit(X,y)
return clf
示例15: SGD
def SGD(x, y):
#Using Stochastic Gradient Descent of Sklearn
from sklearn.linear_model import SGDClassifier
clf = SGDClassifier()
clf.fit(x, y)
return clf.predict(x)