本文整理汇总了Python中sklearn.linear_model.SGDClassifier.score方法的典型用法代码示例。如果您正苦于以下问题:Python SGDClassifier.score方法的具体用法?Python SGDClassifier.score怎么用?Python SGDClassifier.score使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.linear_model.SGDClassifier
的用法示例。
在下文中一共展示了SGDClassifier.score方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train_test_bow
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def train_test_bow(ngram_order, batch_size=128, n_epoch=3):
label_sets = ['full', 'function', '3way', 'in_out', 'man_nat']
for label_set in label_sets:
# need to drop unk for full/function
if label_set in ['full', 'function']:
df = sentences_df(labels=label_set, drop_unk=True)
else:
df = sentences_df(SENTENCES_CSV, labels=label_set, drop_unk=False)
X, y, word2idx, l_enc = load_dataset(df, ngram_order=ngram_order)
print "X shape: %s" % (X.shape,)
print "y shape: %s" % (y.shape,)
skf = StratifiedKFold(y, n_folds=10, shuffle=True, random_state=0)
scores = []
for (train, test) in skf:
clf = None
clf = SGDClassifier(loss='log',
alpha=0.001,
l1_ratio=0,
random_state=0)
for epoch in range(n_epoch):
X_train, y_train, X_test, y_test = X[train], y[train], X[test], y[test]
n_batches = X_train.shape[0] // batch_size
for minibatch_idx in range(n_batches):
clf.partial_fit(
X_train[minibatch_idx * batch_size : (minibatch_idx+1) * batch_size],
y_train[minibatch_idx * batch_size : (minibatch_idx+1) * batch_size],
classes=np.unique(y))
print "Epoch: %d/%d Train acc: %.4f" \
% (epoch+1, n_epoch, clf.score(X_train, y_train))
fold_score = clf.score(X_test, y_test)
print "Fold acc: %.4f" % fold_score
scores.append(fold_score)
print '%s label mean cv accuracy: %.4f\n' % (label_set, np.mean(scores))
示例2: evaluate_svm
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def evaluate_svm(alpha):
# Note: n_iter gets switched to 1 by sklearn whenever you call partial_fit(). This initial
# setting is for the pretesting of eta0.
basic_svm = SGDClassifier(loss="hinge", penalty="l2", l1_ratio=0.0, random_state=31337, n_jobs=5,
n_iter=5, alpha=alpha)
learning_rate_grid = [ 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7 ]
pretest_svm = GridSearchCV(basic_svm,
{"learning_rate": ["constant"],
"eta0": learning_rate_grid}).fit(X_pretest, y_pretest)
bottou_gamma0 = pretest_svm.best_params_["eta0"]
basic_svm.eta0 = bottou_gamma0
basic_svm.learning_rate = "constant"
basic_svm = basic_svm.partial_fit(X_pretest, y_pretest, classes = np.unique(y_train))
progressive_val = []
train_score = []
for dp in range(0, X_train.shape[0], batch_size):
t = dp + n_pretest
basic_svm.eta0 = bottou_gamma0/(1 + bottou_gamma0*alpha*t)
X_batch = X_train[dp:dp+batch_size]
y_batch = y_train[dp:dp+batch_size]
progressive_val.append(basic_svm.score(X_batch, y_batch))
basic_svm = basic_svm.partial_fit(X_batch, y_batch)
train_score.append(basic_svm.score(X_batch, y_batch))
scores = progressive_val[-batches_for_cv_performance:]
return np.mean(scores), np.std(scores), basic_svm
示例3: run_SGD
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def run_SGD(X, y, n_tr, n_te):
X_tr, y_tr, X_te, y_te = X[:n_tr], y[:n_tr], X[-n_te:], y[-n_te:]
penalties = ['hinge', 'log']
for p in penalties:
model = SGDClassifier(loss=p, penalty=None, n_iter=100).fit(X_tr, y_tr)
print 'Training, validation accuracy is %6.4f and %6.4f for %s loss' % \
(model.score(X_tr, y_tr), model.score(X_te, y_te), p)
示例4: sgc_test
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def sgc_test(X, y, weight):
from sklearn.linear_model import SGDClassifier
from sklearn import cross_validation
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
for i in range(0,1):
X_train, X_test, y_train, y_test, weight_train, weight_test = cross_validation.train_test_split(
X, y, weight, test_size=0.2, random_state=0)
clf = SGDClassifier(loss="hinge", n_iter=100, n_jobs=-1, penalty="l2")
#clf = LogisticRegression( max_iter=100)
scaler = StandardScaler(with_mean=False)
scaler.fit(X_train) # Don't cheat - fit only on training data
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test) # apply same transformation to test data
clf.fit(X_train, y_train, sample_weight=weight_train)
y_pred = clf.predict(X_train)
#print(confusion_matrix(y_train, y_pred))
print(clf.score(X_train,y_train,weight_train))
y_pred = clf.predict(X_test)
#print(confusion_matrix(y_test, y_pred))
print(clf.score(X_test,y_test,weight_test))
示例5: crossvalidation
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def crossvalidation(self, rawX, Y):
trainF = self.genfeature(rawX)
X_train, X_test, y_train, y_test = cross_validation.train_test_split(trainF, Y, test_size=0.4, random_state=0)
clf = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)
print 'svc linear', clf.score(X_test, y_test),clf.coef_
clf = SGDClassifier(loss="hinge", penalty="l2").fit(X_train,y_train)
print 'SGDC hinge/l2',clf.score(X_test,y_test),clf.coef_
clf = neighbors.KNeighborsClassifier(5 , weights='uniform').fit(X_train,y_train)
print 'KNN 5/uniform',clf.score(X_test,y_test)
示例6: create_classifier
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def create_classifier(self):
DB.db.connect()
clf = SGDClassifier( loss="modified_huber")
labs_map = NameToIndex()
with DB.db.transaction():
offset = 0
words_count = self.get_words_count()
classes = numpy.arange(0,words_count)
x_all = []
y_all = []
while True:
print ' %d partial_fit %d'%(time(),offset)
query = DB.Vocabulary\
.select(DB.Vocabulary.lv1, DB.Vocabulary.lv2)\
.join(DB.PcaModel, on=(DB.Vocabulary.feature == DB.PcaModel.feature)).order_by( DB.Vocabulary.feature).offset(offset).limit(1000)\
.tuples().iterator()
features = numpy.array(map(lambda x:[x[0]]+list(x[1]),query))
offset += len(features)
if len(features) == 0:
break
Y = features[:,0]
X = features[:,1:]
labs = []
for lab in Y:
labs.append(labs_map.map(lab))
if(len(x_all)<10000):
x_all = x_all + X.tolist()
y_all = y_all + labs
labs = numpy.array(labs)
#clf = LinearSVC()
#clf = OneVsRestClassifier(SVC(probability=True, kernel='linear'))
#clf.fit(X,labs)
clf.partial_fit(X,labs,classes)
print clf.score(x_all,y_all)
DB.TrainingResult.delete().where(DB.TrainingResult.name == self.__class__.__name__+"_clf").execute()
DB.TrainingResult.delete().where(DB.TrainingResult.name == self.__class__.__name__+"_labs_map").execute()
tr = DB.TrainingResult()
tr.name = self.__class__.__name__+"_clf"
tr.data = clf
tr.save()
tr = DB.TrainingResult()
tr.name = self.__class__.__name__+"_labs_map"
tr.data = labs_map
tr.save()
示例7: train_and_pickle_classifier
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def train_and_pickle_classifier():
import numpy as np
from sklearn.linear_model import SGDClassifier
clf = SGDClassifier(loss='log', random_state=1, n_iter=1)
csv_filename = os.path.join('datasets', 'movie_data.csv')
doc_stream = stream_docs(path=csv_filename)
classes = np.array([0, 1])
for _ in range(45):
X_train, y_train = get_minibatch(doc_stream, size=1000)
if X_train is None:
break
else:
X_train = vect.transform(X_train)
clf.partial_fit(X_train, y_train, classes=classes)
X_test, y_test = get_minibatch(doc_stream, size=5000)
X_test = vect.transform(X_test)
print("Test accuracy: %.3f" % clf.score(X_test, y_test))
clf = clf.partial_fit(X_test, y_test)
pickle.dump(clf, open(CLF_FILENAME, 'wb'), protocol=4)
示例8: buildModel
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def buildModel(size):
with open('Sentiment Analysis Dataset.csv', 'rb') as csvfile:
pos_tweets =[]
neg_tweets =[]
spamreader = csv.reader(csvfile, delimiter=',')
for row in spamreader:
if row[1] == '1':
if not (len(pos_tweets) > size):
pos_tweets.append(_cleanTweet(row[3]))
else:
if not (len(neg_tweets) > size):
neg_tweets.append(_cleanTweet(row[3]))
y = np.concatenate((np.ones(len(pos_tweets[0:size])), np.zeros(len(neg_tweets[0:size]))))
x_train, x_test, y_train, y_test = train_test_split(np.concatenate((pos_tweets[0:size], neg_tweets[0:size])), y, test_size=0.2)
x_train = _cleanText(x_train)
x_test = _cleanText(x_test)
n_dim = 100
#Initialize model and build vocab
imdb_w2v = Word2Vec(size=n_dim, min_count=10)
imdb_w2v.build_vocab(x_train)
imdb_w2v.train(x_train)
train_vecs = np.concatenate([buildWordVector(z, n_dim,imdb_w2v) for z in x_train])
train_vecs = scale(train_vecs)
#Train word2vec on test tweets
imdb_w2v.train(x_test)
#Build test tweet vectors then scale
test_vecs = np.concatenate([buildWordVector(z, n_dim,imdb_w2v) for z in x_test])
test_vecs = scale(test_vecs)
lr = SGDClassifier(loss='log', penalty='l1')
lr.fit(train_vecs, y_train)
imdb_w2v.save("imdb_w2v")
f = open("Accuracy.txt","w")
f.write(str(lr.score(test_vecs, y_test))+" "+str(size*2))
f.close()
示例9: test_create_model
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def test_create_model(self):
print("labeled sentence worked?")
x_train = labelizeReviews(self.xTrain, 'TRAIN')
x_test = labelizeReviews(self.xTest, 'TEST')
model_dm = gensim.models.Doc2Vec(min_count=1, window=5, size=self.size, sample=1e-3, negative=5, workers=3)
model_dbow = gensim.models.Doc2Vec(min_count=1, window=6, size=self.size, sample=1e-3, negative=5, dm=0, workers=3)
sentences = x_train
model_dm.build_vocab(sentences)
model_dbow.build_vocab(sentences)
# npArray = np.array(x_train)
for epoch in range(10):
print("Starting epoch:", str(epoch))
# perm = np.random.permutation(npArray.shape[0])
model_dm.train(random.sample(sentences, len(sentences)))
model_dbow.train(random.sample(sentences, len(sentences)))
# model_dm.train(x_train)
train_vecs = getVecs(model_dm, x_train, self.size)
train_vecs_dbow = getVecs(model_dbow, x_train, self.size)
train_vecs_total = np.hstack((train_vecs, train_vecs_dbow))
sentences = x_test
for epoch in range(10):
print("Starting epoch:", str(epoch))
# perm = np.random.permutation(npArray.shape[0])
model_dm.train(random.sample(sentences, len(sentences)))
model_dbow.train(random.sample(sentences, len(sentences)))
test_vecs = getVecs(model_dm, x_train, self.size)
test_vecs_dbow = getVecs(model_dbow, x_train, self.size)
test_vecs_total = np.hstack((test_vecs, test_vecs_dbow))
lr = SGDClassifier(loss='log', penalty='l1')
lr.fit(train_vecs_total, self.labelsTrain[:self.samples])
print('Test Accuracy: %.2f'%lr.score(test_vecs_total, self.labelsTest[:self.samples]))
示例10: run_online_classifier
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def run_online_classifier():
vect = HashingVectorizer(
decode_error='ignore',
n_features=2**21,
preprocessor=None,
tokenizer=tokenizer_streaming,
)
clf = SGDClassifier(loss='log', random_state=1, n_iter=1)
csv_filename = os.path.join('datasets', 'movie_data.csv')
doc_stream = stream_docs(path=csv_filename)
classes = np.array([0, 1])
for _ in range(45):
X_train, y_train = get_minibatch(doc_stream, size=1000)
if X_train is None:
break
else:
X_train = vect.transform(X_train)
clf.partial_fit(X_train, y_train, classes=classes)
X_test, y_test = get_minibatch(doc_stream, size=5000)
X_test = vect.transform(X_test)
print("Test accuracy: %.3f" % clf.score(X_test, y_test))
clf = clf.partial_fit(X_test, y_test)
示例11: apply_minibatch_sgd
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def apply_minibatch_sgd(datasets, minibatch, epoch=5, cores=1, seed=1):
''' Applies the logistic regression sgd method
:type datasets: list
:param datasets: List containing training/testing data
:type minibatch: int
:param minibatch: minibatch size
:type cores: int
:param cores: Number of cores
:type seed: int
:param seed: Random seed
'''
print 'Applying mini-batch SGD with mini-batch size of ', minibatch
training_X, training_y = datasets[0]
testing_X, testing_y = datasets[1]
print 'Shuffling training data'
training_X, training_y = shuffle(training_X, training_y, random_state = seed)
clf = SGDClassifier(loss="log", random_state=seed, n_iter=epoch, verbose=0, n_jobs=cores)
classes = numpy.unique([-1, 1])
minibatches = training_X.shape[0]/minibatch + 1
samples = training_X.shape[0]
for i in xrange(epoch):
print "Epoch ", i+1
for j in xrange(minibatches):
clf.partial_fit(training_X[j*minibatch:min(samples,(j+1)*minibatch)], training_y[j*minibatch:min(samples,(j+1)*minibatch)], classes=classes)
print "Accuracy on testing data:", clf.score(testing_X, testing_y)
示例12: train
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def train():
vect = HashingVectorizer(decode_error='ignore',
n_features=2**21,
preprocessor=None,
ngram_range=(1, 3),
tokenizer=tokenizer)
clf = SGDClassifier(loss='log', random_state=1, n_iter=1)
stream_path = os.path.join(work_path, 'movie_data.csv')
doc_stream = stream_docs(path=stream_path)
pbar = pyprind.ProgBar(45)
classes = np.array([0, 1])
for _ in range(45):
X_train, y_train = get_minibatch(doc_stream, size=1000)
if not X_train:
break
X_train = vect.transform(X_train)
clf.partial_fit(X_train, y_train, classes=classes)
pbar.update()
X_test, y_test = get_minibatch(doc_stream, size=5000)
X_test = vect.transform(X_test)
print('Accuracy: %.3f' % clf.score(X_test, y_test))
clf = clf.partial_fit(X_test, y_test)
return clf
示例13: SGD
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def SGD(X,Y,idealArray):
print "now try SGD classifier"
from sklearn.linear_model import SGDClassifier
clf = SGDClassifier(loss="hinge", penalty="l2")
clf.fit(X,Y)
print "clf works", clf
#print clf.coef_
#print clf.score(shortData,shortLabels)
#gives 0.908 accuracy!
#print clf.score(fullData,fullLabels)
#gives 0.8234 accuracy
print clf.score(X,Y)
smallData = shortListGen(idealArray,fullData[0:1000])
print "got it"
print clf.score(smallData,shortLabels)
示例14: mine
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def mine():
print("Starting")
clf = SGDClassifier(loss='log',random_state=1,n_iter=1)
print('Create/Load Classifier')
doc_stream = stream_docs(path='./movie_data.csv')
print('Fitting data')
classes = np.array([0,1])
for _ in range(45):
X_train, y_train = get_minibatch(doc_stream, size=1000)
if not X_train:
break
X_train = vect.transform(X_train)
clf.partial_fit(X_train, y_train, classes=classes)
print('Finished Fitting')
X_test, y_test = get_minibatch(doc_stream, size=5000)
X_test = vect.transform(X_test)
print('Accuracy: %.3f' % clf.score(X_test,y_test))
print('create pickle objects')
dest = os.path.join('','pkl_objects')
if not os.path.exists(dest):
os.makedirs(dest)
pickle.dump(stop, open(os.path.join(dest,'stopwords.pkl'),'wb'), protocol=4)
pickle.dump(clf, open(os.path.join(dest,'classifier.pkl'),'wb'), protocol=4)
示例15: SGDC_SVM_Classifier
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def SGDC_SVM_Classifier(X_train, X_cv, X_test, Y_train,Y_cv,Y_test, Actual_DS):
print("***************Starting SVM***************")
t0 = time()
clf = SGDClassifier(loss='log', penalty='l2',alpha=1e-5, n_iter=100)
clf.fit(X_train, Y_train)
preds = clf.predict(X_cv)
score = clf.score(X_cv,Y_cv)
print("{0:.2f}%".format(100 * score))
Summary = pd.crosstab(label_enc.inverse_transform(Y_cv), label_enc.inverse_transform(preds),
rownames=['actual'], colnames=['preds'])
Summary['pct'] = (Summary.divide(Summary.sum(axis=1), axis=1)).max(axis=1)*100
print(Summary)
#Check with log loss function
epsilon = 1e-15
#ll_output = log_loss_func(Y_cv, preds, epsilon)
preds2 = clf.predict_proba(X_cv)
ll_output2= log_loss(Y_cv, preds2, eps=1e-15, normalize=True)
print(ll_output2)
print("done in %0.3fs" % (time() - t0))
preds3 = clf.predict_proba(X_test)
#preds4 = clf.predict_proba((Actual_DS.ix[:,'feat_1':]))
preds4 = clf.predict_proba(Actual_DS)
print("***************Ending SVM***************")
return pd.DataFrame(preds2),pd.DataFrame(preds3),pd.DataFrame(preds4)