本文整理汇总了Python中sklearn.linear_model.SGDClassifier.predict方法的典型用法代码示例。如果您正苦于以下问题:Python SGDClassifier.predict方法的具体用法?Python SGDClassifier.predict怎么用?Python SGDClassifier.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.linear_model.SGDClassifier
的用法示例。
在下文中一共展示了SGDClassifier.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: sgc_test
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def sgc_test(X, y, weight):
from sklearn.linear_model import SGDClassifier
from sklearn import cross_validation
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
for i in range(0,1):
X_train, X_test, y_train, y_test, weight_train, weight_test = cross_validation.train_test_split(
X, y, weight, test_size=0.2, random_state=0)
clf = SGDClassifier(loss="hinge", n_iter=100, n_jobs=-1, penalty="l2")
#clf = LogisticRegression( max_iter=100)
scaler = StandardScaler(with_mean=False)
scaler.fit(X_train) # Don't cheat - fit only on training data
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test) # apply same transformation to test data
clf.fit(X_train, y_train, sample_weight=weight_train)
y_pred = clf.predict(X_train)
#print(confusion_matrix(y_train, y_pred))
print(clf.score(X_train,y_train,weight_train))
y_pred = clf.predict(X_test)
#print(confusion_matrix(y_test, y_pred))
print(clf.score(X_test,y_test,weight_test))
示例2: test_multi_output_classification_partial_fit
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def test_multi_output_classification_partial_fit():
# test if multi_target initializes correctly with base estimator and fit
# assert predictions work as expected for predict
sgd_linear_clf = SGDClassifier(loss='log', random_state=1)
multi_target_linear = MultiOutputClassifier(sgd_linear_clf)
# train the multi_target_linear and also get the predictions.
half_index = X.shape[0] // 2
multi_target_linear.partial_fit(
X[:half_index], y[:half_index], classes=classes)
first_predictions = multi_target_linear.predict(X)
assert_equal((n_samples, n_outputs), first_predictions.shape)
multi_target_linear.partial_fit(X[half_index:], y[half_index:])
second_predictions = multi_target_linear.predict(X)
assert_equal((n_samples, n_outputs), second_predictions.shape)
# train the linear classification with each column and assert that
# predictions are equal after first partial_fit and second partial_fit
for i in range(3):
# create a clone with the same state
sgd_linear_clf = clone(sgd_linear_clf)
sgd_linear_clf.partial_fit(
X[:half_index], y[:half_index, i], classes=classes[i])
assert_array_equal(sgd_linear_clf.predict(X), first_predictions[:, i])
sgd_linear_clf.partial_fit(X[half_index:], y[half_index:, i])
assert_array_equal(sgd_linear_clf.predict(X), second_predictions[:, i])
示例3: algo
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def algo(a):
global data
global week
target = data['target']
data = data[["id", "cpu", "creator", "dbs" , "dtype" , "era" , "nblk" , "nevt" , "nfiles" , "nlumis" , "nrel" , "nsites" , "nusers" , "parent" , "primds" , "proc_evts" , "procds" , "rnaccess" , "rnusers" , "rtotcpu" , "size" , "tier" , "totcpu" , "wct", "naccess"]]
week['target'] = 0
week['target'] = week.apply(convert, axis=1)
week['target'] = week['target'].astype(int)
test1 = week
week = week[["id", "cpu", "creator", "dbs" , "dtype" , "era" , "nblk" , "nevt" , "nfiles" , "nlumis" , "nrel" , "nsites" , "nusers" , "parent" , "primds" , "proc_evts" , "procds" , "rnaccess" , "rnusers" , "rtotcpu" , "size" , "tier" , "totcpu" , "wct", "naccess"]]
if a == 'rf':
#RANDOM FOREST CLASSIFIER
rf = RandomForestClassifier(n_estimators=100)
rf = rf.fit(data, target)
predictions = rf.predict(week)
cal_score("RANDOM FOREST", rf, predictions, test1['target'])
if a == "sgd":
#SGD CLASSIFIER
clf = SGDClassifier(alpha=0.0001, class_weight=None, epsilon=0.1, eta0=0.0,
fit_intercept=True, l1_ratio=0.15, learning_rate='optimal',
loss='hinge', n_iter=5, n_jobs=1, penalty='l2', power_t=0.5,
random_state=None, shuffle=True, verbose=0,
warm_start=False)
clf.fit(data, target)
predictions = clf.predict(week)
cal_score("SGD Regression",clf, predictions, test1['target'])
if a == "nb":
clf = GaussianNB()
clf.fit(data, target)
predictions = clf.predict(week)
cal_score("NAIVE BAYES", clf, predictions, test1['target'])
示例4: classify_reviews
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def classify_reviews():
import featurizer
import gen_training_data
import numpy as np
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import SGDClassifier
data = gen_training_data.gen_data();
stemmed_data = featurizer.stem(data);
tfidf= featurizer.tfidf(data);
clf = MultinomialNB().fit(tfidf['train_tfidf'], data['training_labels']);
predicted = clf.predict(tfidf['test_tfidf']);
num_wrong = 0;
tot = 0;
for expected, guessed in zip(data['testing_labels'], predicted):
if(expected-guessed != 0):
num_wrong += 1;
print("num_wrong: %d",num_wrong)
sgd_clf = SGDClassifier(loss='hinge', penalty='l2', alpha=1e-3, n_iter=5, random_state=42);
_ = sgd_clf.fit(tfidf['train_tfidf'], data['training_labels']);
sgd_pred = sgd_clf.predict(tfidf['test_tfidf']);
print np.mean(sgd_pred == data['testing_labels']);
stem_tfidf = featurizer.tfidf(stemmed_data);
_ = sgd_clf.fit(stem_tfidf['train_tfidf'], data['training_labels']);
sgd_stem_prd = sgd_clf.predict(stem_tfidf['test_tfidf']);
print np.mean(sgd_stem_prd==data['testing_labels']);
示例5: sgd_classifier
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def sgd_classifier(V_train, y_train, V_val, y_val, V_test, y_test):
t0 = time.time()
print 'Building Random Forest model'
clf = SGDClassifier(n_iter = 50)
#clf = grid_search.GridSearchCV(svm_clf, parameters)
clf.fit(V_train, y_train)
#print clf.best_params_
t1 = time.time()
print 'Building Random Forest model ... Done', str(int((t1 - t0)*100)/100.)
print ''
p_val =clf.predict(V_val)
print 'Training accuracy on validation set', accuracy_score(y_val, p_val)
p_test = clf.predict(V_test)
print 'Accuracy on testing set'
print classification_report(y_test, p_test)
示例6: scikit_GDS
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def scikit_GDS(x,y, X_test,y_test=None, prevmodel="yes", output=False):
from sklearn.linear_model import SGDClassifier
from sklearn.externals import joblib
clf = SGDClassifier(loss="hinge", penalty="l2")
##
if prevmodel !="yes":
clf.fit(X, y)
joblib.dump(clf, 'trained_GDS_model.pkl')
else:
clf =joblib.load('trained_GDS_model.pkl')
if output == False:
predictions = clf.predict(X_test)
correctcount = 0
totalcount = 0
for index, each in enumerate(predictions):
if y_test[index] == each:
correctcount +=1
totalcount+=1
print str(correctcount) +" / " + str(totalcount) +" = " + str(float(correctcount)/totalcount)
else:
predictions = clf.predict(X_test)
return predictions
示例7: main
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def main(which='NB'):
print 'reading training data'
training_data, phrase_to_id = read_data(source='dat/train.tsv')
print 'getting features'
global global_features
global adjectives
global_features = get_features(get_all_words(training_data))
with open('adj', 'r') as adj_file:
for adj in adj_file:
adjectives.append(adj.lower().rstrip())
print 'entering switch'
if which == 'NB':
training_set = nltk.classify.util.apply_features(extract_features, get_phrase_list(training_data, True))
print 'moving to classifier creation'
start = time.clock()
classifier = nltk.NaiveBayesClassifier.train(training_set)
print 'classfier total time: ', str(time.clock() - start)
#classifier = SklearnClassifier(BernoulliNB()).train(training_set)
pickle.dump(classifier, open('classifier.pickle', 'w'))
text = raw_input('Next test (q to quit):')
while text != 'q':
print classifier.classify(extract_features(text.split()))
text = raw_input('Next test (q to quit):')
elif which == 'SGD':
print 'extracting features'
training_set = nltk.classify.util.apply_features(extract_features, get_phrase_list(training_data))
training_list = []
for d in training_set:
sample = []
for k, v in d.iteritems():
sample.append(v)
training_list.append(sample)
label_set = [int(tup[1]) for tup in training_data]
print 'moving to classifier creation'
clf = SGDClassifier(loss="hinge", penalty="l2")
print 'moving to training'
clf.fit(training_list, label_set)
pickle.dump(clf, open('sgd_sent.pickle', 'w'))
print 'moving to prediction'
pred =[]
pred.append('i hate everyhing')
pred.append('i love everything')
pred_set = nltk.classify.util.apply_features(extract_features, pred)
pred_list = []
print pred_set
for d in pred_set:
inst_list = []
for k, v in d.iteritems():
inst_list.append(v)
pred_list.append(inst_list)
print pred_list
print clf.predict(pred_list)
示例8: SGD
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
class SGD(CrossDomainClassifier):
"""
Stochastic Gradient Descent with Tfidf
"""
def train(self, limit_data=None):
if not hasattr(self, 'reviews'):
print "No data loaded"
return
if limit_data is None:
limit_data = len(self.reviews)
X = self.get_bag_of_ngrams(self.reviews[:limit_data])
self.clf = SGDClassifier(loss="modified_huber", alpha=0.001, penalty="l2").fit(X, self.labels[:limit_data])
def __test(self, reviews, labels):
X_training_counts = self.count_vect.transform(reviews)
X_training_tfidf = self.tfidf_transformer.transform(X_training_counts)
predicted = self.clf.predict(X_training_tfidf)
self.cm = confusion_matrix(labels, predicted)
return 1 - np.mean(predicted == labels)
def get_training_error(self):
return self.__test(self.reviews, self.labels)
def get_generalized_error(self):
return self.__test(self.test_reviews, self.test_labels)
def get_crossdomain_error(self):
return {'twitter': self.__test(self.twitter_items, self.twitter_labels),
'ebay': self.__test(self.ebay_items, self.ebay_labels)}
def __get_scores(self, reviews, labels):
X_training_counts = self.count_vect.transform(reviews)
X_training_tfidf = self.tfidf_transformer.transform(X_training_counts)
predicted = self.clf.predict(X_training_tfidf)
self.cm = confusion_matrix(labels, predicted)
return precision_recall_fscore_support(labels, predicted, average='macro')
def get_scores_training(self):
return self.__get_scores(self.reviews, self.labels)
def get_scores_test(self):
return self.__get_scores(self.test_reviews, self.test_labels)
def get_scores_twitter(self):
return self.__get_scores(self.twitter_items, self.twitter_labels)
def get_scores_ebay(self):
return self.__get_scores(self.ebay_items, self.ebay_labels)
示例9: chi_feature_select
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def chi_feature_select(train_file, test_file):
lines = read_text_src(train_file)
lines = [x for x in lines if len(x)>1]
X_train = [line[1] for line in lines]
y_train = [line[0] for line in lines]
lines = read_text_src(test_file)
lines = [x for x in lines if len(x) > 1]
X_test = [line[1] for line in lines]
y_test = [line[0] for line in lines]
vectorizer = TfidfVectorizer(tokenizer=zh_tokenize)#ngram_range=(1,2)
X_train = vectorizer.fit_transform(X_train)
print X_train.shape
X_test = vectorizer.transform(X_test)
# word = vectorizer.get_feature_names()
# N = X_train.shape[1]
# ch2 = SelectKBest(chi2, k=int(N*0.2)) #.fit_transform(X, y)
#
#
# X_train = ch2.fit_transform(X_train, y_train)
# X_test = ch2.transform(X_test)
# feature_names = [word[i] for i
# in ch2.get_support(indices=True)]
#
# for i in feature_names:
# print i.encode('utf-8')
# feature_names = np.asarray(feature_names)
# print feature_names
# clf = LinearSVC(penalty="l1", dual=False, tol=1e-3)
# clf.fit(X_train, y_train)
clf = SGDClassifier(loss="log", penalty='l1')
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
prob = clf.predict_proba(X_test[0])
print prob
X=["市场经济复苏,互联网公司蓬勃发展","世纪大战终于开启,勇士引得第73胜"]
Y=['1','0']
X=vectorizer.transform(X)
clf.partial_fit(X,Y, classes=['0','1'])
tmpx=['暴风科技股价大跌',"世纪大战终于开启,勇士引得第73胜"]
tmpX=vectorizer.transform(tmpx)
pred = clf.predict(tmpX)
print pred
示例10: solve
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def solve(exp, X_train, y_train, X_test, y_test, seed):
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)
loss = exp["loss"]
reg = exp["reg"]
verbose = exp["verbose"]
if (loss == "softmax"):
y_train_pred, y_test_pred = softmax_gn(X_train, y_train, X_test, y_test, reg, verbose=True)
else:
clf = SGDClassifier(loss=loss, random_state=RANDOM_STATE, alpha=reg, verbose=int(verbose))
clf.fit(X_train, y_train)
y_train_pred = clf.predict(X_train)
y_test_pred = clf.predict(X_test)
return y_train_pred, y_test_pred
示例11: run_regression
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def run_regression(train_embeds, train_labels, test_embeds, test_labels):
np.random.seed(1)
from sklearn.linear_model import SGDClassifier
from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score
dummy = DummyClassifier()
dummy.fit(train_embeds, train_labels)
log = SGDClassifier(loss="log", n_jobs=55)
log.fit(train_embeds, train_labels)
print("Test scores")
print(accuracy_score(test_labels, log.predict(test_embeds)))
print("Train scores")
print(accuracy_score(train_labels, log.predict(train_embeds)))
print("Random baseline")
print(accuracy_score(test_labels, dummy.predict(test_embeds)))
示例12: SGDCls
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
class SGDCls(object):
"""docstring for ClassName"""
def __init__(self):
self.sgd_cls = SGDClassifier()
self.prediction = None
self.train_x = None
self.train_y = None
def train_model(self, train_x, train_y):
try:
self.train_x = train_x
self.train_y = train_y
print(self.train_y)
self.sgd_cls.fit(train_x, train_y)
except:
print(traceback.format_exc())
def predict(self, test_x):
try:
self.prediction = self.sgd_cls.predict(test_x)
return self.prediction
except:
print(traceback.format_exc())
def accuracy_score(self, test_y):
try:
return r2_score(test_y, self.prediction)
except:
print(traceback.format_exc())
示例13: runSGDPipeline
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def runSGDPipeline(entries, langs):
t0 = time()
sgd_pipeline = Pipeline([('vect', CountVectorizer(ngram_range=(1,1), max_features=n_features)),
('tfidf', TfidfTransformer(use_idf=True)),
('clf', SGDClassifier(loss='squared_hinge', penalty='l2',
alpha=0.001, n_iter=5, random_state=42))])
vect = CountVectorizer(ngram_range=(1,1), max_features=n_features)
X_train_counts = vect.fit_transform(entries)
tfidf = TfidfTransformer(use_idf=True).fit(X_train_counts)
X_train_tfidf = tfidf.fit_transform(X_train_counts)
clf = SGDClassifier(loss='squared_hinge', penalty='l2', alpha=0.001, n_iter=5, random_state=42)
clf.fit(X_train_tfidf, langs)
X_new_counts = vect.transform(entries)
X_new_tfidf = tfidf.transform(X_new_counts)
predicted = clf.predict(X_new_tfidf.toarray())
print(np.mean(predicted == langs))
print(metrics.classification_report(langs, predicted, target_names=langs))
print(metrics.confusion_matrix(langs, predicted))
print("Took %s seconds." % (time()-t0))
print("n_samples: %d, n_features: %d" % X_train_tfidf.shape)
return sgd_pipeline
示例14: kernelsvm
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
class kernelsvm():
def __init__(self, theta0, alpha, loss_metric):
self.theta0 = theta0
self.alpha = alpha
self.loss_metric = loss_metric
def fit(self, X, y, idx_SR):
n_SR = len(idx_SR)
self.feature_map_nystroem = General_Nystroem(kernel='rbf', gamma=self.theta0, n_components=n_SR)
X_features = self.feature_map_nystroem.fit_transform(X,idx_SR)
print("fitting SGD")
self.clf = SGDClassifier(loss=self.loss_metric,alpha=self.alpha)
self.clf.fit(X_features, y)
print("fitting SGD finished")
def predict(self, X):
print("Predicting")
X_transform = self.feature_map_nystroem.transform(X)
return self.clf.predict(X_transform), X_transform
def decision_function(self, X):
# X should be the transformed input!
return self.clf.decision_function(X)
def err_rate(self, y_true, y_pred):
acc = accuracy_score(y_true, y_pred)
err_rate = 1.0-acc
return err_rate
def get_params(self):
return self.clf.get_params()
示例15: SGD_lassifier
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def SGD_lassifier(X_train, categories, X_test, test_categories):
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import confusion_matrix
clf = SGDClassifier(alpha=.0001, n_iter=50).fit(X_train, categories)
y_pred = clf.predict(X_test)
print '\n Here is the classification report for SGD classifier:'
print metrics.classification_report(test_categories, y_pred)