本文整理汇总了Python中sklearn.linear_model.SGDClassifier.partial_fit方法的典型用法代码示例。如果您正苦于以下问题:Python SGDClassifier.partial_fit方法的具体用法?Python SGDClassifier.partial_fit怎么用?Python SGDClassifier.partial_fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.linear_model.SGDClassifier
的用法示例。
在下文中一共展示了SGDClassifier.partial_fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def train():
vect = HashingVectorizer(decode_error='ignore',
n_features=2**21,
preprocessor=None,
ngram_range=(1, 3),
tokenizer=tokenizer)
clf = SGDClassifier(loss='log', random_state=1, n_iter=1)
stream_path = os.path.join(work_path, 'movie_data.csv')
doc_stream = stream_docs(path=stream_path)
pbar = pyprind.ProgBar(45)
classes = np.array([0, 1])
for _ in range(45):
X_train, y_train = get_minibatch(doc_stream, size=1000)
if not X_train:
break
X_train = vect.transform(X_train)
clf.partial_fit(X_train, y_train, classes=classes)
pbar.update()
X_test, y_test = get_minibatch(doc_stream, size=5000)
X_test = vect.transform(X_test)
print('Accuracy: %.3f' % clf.score(X_test, y_test))
clf = clf.partial_fit(X_test, y_test)
return clf
示例2: test_multi_output_classification_partial_fit
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def test_multi_output_classification_partial_fit():
# test if multi_target initializes correctly with base estimator and fit
# assert predictions work as expected for predict
sgd_linear_clf = SGDClassifier(loss='log', random_state=1)
multi_target_linear = MultiOutputClassifier(sgd_linear_clf)
# train the multi_target_linear and also get the predictions.
half_index = X.shape[0] // 2
multi_target_linear.partial_fit(
X[:half_index], y[:half_index], classes=classes)
first_predictions = multi_target_linear.predict(X)
assert_equal((n_samples, n_outputs), first_predictions.shape)
multi_target_linear.partial_fit(X[half_index:], y[half_index:])
second_predictions = multi_target_linear.predict(X)
assert_equal((n_samples, n_outputs), second_predictions.shape)
# train the linear classification with each column and assert that
# predictions are equal after first partial_fit and second partial_fit
for i in range(3):
# create a clone with the same state
sgd_linear_clf = clone(sgd_linear_clf)
sgd_linear_clf.partial_fit(
X[:half_index], y[:half_index, i], classes=classes[i])
assert_array_equal(sgd_linear_clf.predict(X), first_predictions[:, i])
sgd_linear_clf.partial_fit(X[half_index:], y[half_index:, i])
assert_array_equal(sgd_linear_clf.predict(X), second_predictions[:, i])
示例3: train_and_pickle_classifier
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def train_and_pickle_classifier():
import numpy as np
from sklearn.linear_model import SGDClassifier
clf = SGDClassifier(loss='log', random_state=1, n_iter=1)
csv_filename = os.path.join('datasets', 'movie_data.csv')
doc_stream = stream_docs(path=csv_filename)
classes = np.array([0, 1])
for _ in range(45):
X_train, y_train = get_minibatch(doc_stream, size=1000)
if X_train is None:
break
else:
X_train = vect.transform(X_train)
clf.partial_fit(X_train, y_train, classes=classes)
X_test, y_test = get_minibatch(doc_stream, size=5000)
X_test = vect.transform(X_test)
print("Test accuracy: %.3f" % clf.score(X_test, y_test))
clf = clf.partial_fit(X_test, y_test)
pickle.dump(clf, open(CLF_FILENAME, 'wb'), protocol=4)
示例4: __init__
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
class LightModel:
def __init__(self,learningRate, numEpochs, ppenalty="l1", mustShuffle=True):
#Init scikit models
self.Classifier = SGDClassifier(penalty=ppenalty, loss='log', alpha=learningRate, n_iter = numEpochs, shuffle=mustShuffle)
def train(self, gen, v=False):
i = 0
for x, y in gen: #For each batch
self.Classifier.partial_fit(x, y, [0,1])
i += len(x)
if v : print(str(datetime.now())[:-7] , "example:", i)
def test(self, gen, v=False):
#init target and prediction arrays
ytot = np.array([])
ptot = np.array([])
#Get prediction for each batch
i = 0
for x,y in gen:
p = self.Classifier.predict_proba(x)
p = p.T[1].T #Keep column corresponding to probability of class 1
#Stack target and prediction for later analysis
ytot = np.hstack((ytot, y))
ptot = np.hstack((ptot, p))
i += y.shape[0]
if v : print(str(datetime.now())[:-7] , "example:", i)
if v: print("Score:", self.score(ytot, ptot))
return (ytot, ptot)
def score(self, target, prediction):
return llfun(target, prediction)
示例5: mine
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def mine():
print("Starting")
clf = SGDClassifier(loss='log',random_state=1,n_iter=1)
print('Create/Load Classifier')
doc_stream = stream_docs(path='./movie_data.csv')
print('Fitting data')
classes = np.array([0,1])
for _ in range(45):
X_train, y_train = get_minibatch(doc_stream, size=1000)
if not X_train:
break
X_train = vect.transform(X_train)
clf.partial_fit(X_train, y_train, classes=classes)
print('Finished Fitting')
X_test, y_test = get_minibatch(doc_stream, size=5000)
X_test = vect.transform(X_test)
print('Accuracy: %.3f' % clf.score(X_test,y_test))
print('create pickle objects')
dest = os.path.join('','pkl_objects')
if not os.path.exists(dest):
os.makedirs(dest)
pickle.dump(stop, open(os.path.join(dest,'stopwords.pkl'),'wb'), protocol=4)
pickle.dump(clf, open(os.path.join(dest,'classifier.pkl'),'wb'), protocol=4)
示例6: apply_minibatch_sgd
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def apply_minibatch_sgd(datasets, minibatch, epoch=5, cores=1, seed=1):
''' Applies the logistic regression sgd method
:type datasets: list
:param datasets: List containing training/testing data
:type minibatch: int
:param minibatch: minibatch size
:type cores: int
:param cores: Number of cores
:type seed: int
:param seed: Random seed
'''
print 'Applying mini-batch SGD with mini-batch size of ', minibatch
training_X, training_y = datasets[0]
testing_X, testing_y = datasets[1]
print 'Shuffling training data'
training_X, training_y = shuffle(training_X, training_y, random_state = seed)
clf = SGDClassifier(loss="log", random_state=seed, n_iter=epoch, verbose=0, n_jobs=cores)
classes = numpy.unique([-1, 1])
minibatches = training_X.shape[0]/minibatch + 1
samples = training_X.shape[0]
for i in xrange(epoch):
print "Epoch ", i+1
for j in xrange(minibatches):
clf.partial_fit(training_X[j*minibatch:min(samples,(j+1)*minibatch)], training_y[j*minibatch:min(samples,(j+1)*minibatch)], classes=classes)
print "Accuracy on testing data:", clf.score(testing_X, testing_y)
示例7: test_transformer
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def test_transformer(transformer, data_set, configuration):
clf = SGDClassifier(alpha=0.005)
samples = []
labels = range(10)
for epoch in range(configuration.hyper_parameters.epochs):
for index, sample in enumerate(transformer.compute_outputs(data_set.trainset[0], data_set.trainset[1], 1)):
samples.append(sample.reshape((1, sample.shape[0])))
if index % 10 == 9:
clf.partial_fit(samples, labels, labels)
samples = []
gc.collect()
error = 0
count = 0
test_predictions = []
for index, sample in enumerate(transformer.compute_outputs(data_set.testset[0], data_set.testset[1], 1)):
prediction = clf.predict(sample)
if not prediction == index % 10:
error += 1
count += 1
test_predictions.append(prediction)
OutputLog().write('test predictions weight: {0}'.format(test_predictions))
OutputLog().write('\nerror: %f%%\n' % error)
示例8: run_online_classifier
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def run_online_classifier():
vect = HashingVectorizer(
decode_error='ignore',
n_features=2**21,
preprocessor=None,
tokenizer=tokenizer_streaming,
)
clf = SGDClassifier(loss='log', random_state=1, n_iter=1)
csv_filename = os.path.join('datasets', 'movie_data.csv')
doc_stream = stream_docs(path=csv_filename)
classes = np.array([0, 1])
for _ in range(45):
X_train, y_train = get_minibatch(doc_stream, size=1000)
if X_train is None:
break
else:
X_train = vect.transform(X_train)
clf.partial_fit(X_train, y_train, classes=classes)
X_test, y_test = get_minibatch(doc_stream, size=5000)
X_test = vect.transform(X_test)
print("Test accuracy: %.3f" % clf.score(X_test, y_test))
clf = clf.partial_fit(X_test, y_test)
示例9: evaluate_svm
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def evaluate_svm(alpha):
# Note: n_iter gets switched to 1 by sklearn whenever you call partial_fit(). This initial
# setting is for the pretesting of eta0.
basic_svm = SGDClassifier(loss="hinge", penalty="l2", l1_ratio=0.0, random_state=31337, n_jobs=5,
n_iter=5, alpha=alpha)
learning_rate_grid = [ 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7 ]
pretest_svm = GridSearchCV(basic_svm,
{"learning_rate": ["constant"],
"eta0": learning_rate_grid}).fit(X_pretest, y_pretest)
bottou_gamma0 = pretest_svm.best_params_["eta0"]
basic_svm.eta0 = bottou_gamma0
basic_svm.learning_rate = "constant"
basic_svm = basic_svm.partial_fit(X_pretest, y_pretest, classes = np.unique(y_train))
progressive_val = []
train_score = []
for dp in range(0, X_train.shape[0], batch_size):
t = dp + n_pretest
basic_svm.eta0 = bottou_gamma0/(1 + bottou_gamma0*alpha*t)
X_batch = X_train[dp:dp+batch_size]
y_batch = y_train[dp:dp+batch_size]
progressive_val.append(basic_svm.score(X_batch, y_batch))
basic_svm = basic_svm.partial_fit(X_batch, y_batch)
train_score.append(basic_svm.score(X_batch, y_batch))
scores = progressive_val[-batches_for_cv_performance:]
return np.mean(scores), np.std(scores), basic_svm
示例10: SGDRanker
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
class SGDRanker(BaseEstimator):
""" Ranking predictor using stochastic gradient descent
TODO:
-allow configurable parameters for classifier
-seed random state
"""
def __init__(self, seconds=10):
self.clf = SGDClassifier(loss='hinge')
self.clf.fit_intercept = False
self.clf.classes_ = np.array([-1, 1])
self.seconds = seconds
def fit(self, X, y):
rows = X.shape[0]
start_time = time.time()
for i in itertools.count():
if time.time() - start_time > self.seconds:
return self
idx1 = random.randint(0, rows - 1)
idx2 = random.randint(0, rows - 1)
y1, y2 = y[idx1], y[idx2]
if y1 == y2:
continue
self.clf.partial_fit(X[idx1] - X[idx2], np.sign(y1 - y2))
def predict(self, X):
return np.dot(X, self.clf.coef_.T)
示例11: train_test_bow
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def train_test_bow(ngram_order, batch_size=128, n_epoch=3):
label_sets = ['full', 'function', '3way', 'in_out', 'man_nat']
for label_set in label_sets:
# need to drop unk for full/function
if label_set in ['full', 'function']:
df = sentences_df(labels=label_set, drop_unk=True)
else:
df = sentences_df(SENTENCES_CSV, labels=label_set, drop_unk=False)
X, y, word2idx, l_enc = load_dataset(df, ngram_order=ngram_order)
print "X shape: %s" % (X.shape,)
print "y shape: %s" % (y.shape,)
skf = StratifiedKFold(y, n_folds=10, shuffle=True, random_state=0)
scores = []
for (train, test) in skf:
clf = None
clf = SGDClassifier(loss='log',
alpha=0.001,
l1_ratio=0,
random_state=0)
for epoch in range(n_epoch):
X_train, y_train, X_test, y_test = X[train], y[train], X[test], y[test]
n_batches = X_train.shape[0] // batch_size
for minibatch_idx in range(n_batches):
clf.partial_fit(
X_train[minibatch_idx * batch_size : (minibatch_idx+1) * batch_size],
y_train[minibatch_idx * batch_size : (minibatch_idx+1) * batch_size],
classes=np.unique(y))
print "Epoch: %d/%d Train acc: %.4f" \
% (epoch+1, n_epoch, clf.score(X_train, y_train))
fold_score = clf.score(X_test, y_test)
print "Fold acc: %.4f" % fold_score
scores.append(fold_score)
print '%s label mean cv accuracy: %.4f\n' % (label_set, np.mean(scores))
示例12: train
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def train():
model = SGDClassifier()
for batch_no, batch in enumerate(db.mini_batches(100)):
X, y = vectorize_batch(batch)
model.partial_fit(X, y)
if sampling and batch_no == 10:
break
return model
示例13: create_classifier
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def create_classifier(self):
DB.db.connect()
clf = SGDClassifier( loss="modified_huber")
labs_map = NameToIndex()
with DB.db.transaction():
offset = 0
words_count = self.get_words_count()
classes = numpy.arange(0,words_count)
x_all = []
y_all = []
while True:
print ' %d partial_fit %d'%(time(),offset)
query = DB.Vocabulary\
.select(DB.Vocabulary.lv1, DB.Vocabulary.lv2)\
.join(DB.PcaModel, on=(DB.Vocabulary.feature == DB.PcaModel.feature)).order_by( DB.Vocabulary.feature).offset(offset).limit(1000)\
.tuples().iterator()
features = numpy.array(map(lambda x:[x[0]]+list(x[1]),query))
offset += len(features)
if len(features) == 0:
break
Y = features[:,0]
X = features[:,1:]
labs = []
for lab in Y:
labs.append(labs_map.map(lab))
if(len(x_all)<10000):
x_all = x_all + X.tolist()
y_all = y_all + labs
labs = numpy.array(labs)
#clf = LinearSVC()
#clf = OneVsRestClassifier(SVC(probability=True, kernel='linear'))
#clf.fit(X,labs)
clf.partial_fit(X,labs,classes)
print clf.score(x_all,y_all)
DB.TrainingResult.delete().where(DB.TrainingResult.name == self.__class__.__name__+"_clf").execute()
DB.TrainingResult.delete().where(DB.TrainingResult.name == self.__class__.__name__+"_labs_map").execute()
tr = DB.TrainingResult()
tr.name = self.__class__.__name__+"_clf"
tr.data = clf
tr.save()
tr = DB.TrainingResult()
tr.name = self.__class__.__name__+"_labs_map"
tr.data = labs_map
tr.save()
示例14: chi_feature_select
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def chi_feature_select(train_file, test_file):
lines = read_text_src(train_file)
lines = [x for x in lines if len(x)>1]
X_train = [line[1] for line in lines]
y_train = [line[0] for line in lines]
lines = read_text_src(test_file)
lines = [x for x in lines if len(x) > 1]
X_test = [line[1] for line in lines]
y_test = [line[0] for line in lines]
vectorizer = TfidfVectorizer(tokenizer=zh_tokenize)#ngram_range=(1,2)
X_train = vectorizer.fit_transform(X_train)
print X_train.shape
X_test = vectorizer.transform(X_test)
# word = vectorizer.get_feature_names()
# N = X_train.shape[1]
# ch2 = SelectKBest(chi2, k=int(N*0.2)) #.fit_transform(X, y)
#
#
# X_train = ch2.fit_transform(X_train, y_train)
# X_test = ch2.transform(X_test)
# feature_names = [word[i] for i
# in ch2.get_support(indices=True)]
#
# for i in feature_names:
# print i.encode('utf-8')
# feature_names = np.asarray(feature_names)
# print feature_names
# clf = LinearSVC(penalty="l1", dual=False, tol=1e-3)
# clf.fit(X_train, y_train)
clf = SGDClassifier(loss="log", penalty='l1')
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
prob = clf.predict_proba(X_test[0])
print prob
X=["市场经济复苏,互联网公司蓬勃发展","世纪大战终于开启,勇士引得第73胜"]
Y=['1','0']
X=vectorizer.transform(X)
clf.partial_fit(X,Y, classes=['0','1'])
tmpx=['暴风科技股价大跌',"世纪大战终于开启,勇士引得第73胜"]
tmpX=vectorizer.transform(tmpx)
pred = clf.predict(tmpX)
print pred
示例15: main
# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def main():
# Get training and model filenames
with open('model_metadata.json') as f:
config = json.load(f)
CLASSES = [float(x) for x in config['classes']]
model_filename = config['modelFilename']
NUM_BITS_FOR_HASHING = config['numBitsForHashing']
train_filename = config['trainFilename']
sklearn_version_expected = config['sklearnVersion']
# If sklearn version is wrong, exit without training
if float(sklearn.__version__) != float(sklearn_version_expected):
print "Wrong sklearn version"
sys.exit(0)
with open(train_filename) as f:
lines = (tuple(line.rstrip('\n').split('\t')) for line in f)
parsed_lines = ((line[1:], float(line[0])) for line in lines)
# Parse header and get feature names for namespacing
header = next(lines)
FEATURE_NAMES = tuple(header[1:])
# Build pipeline
pre_processing_pipeline = make_pre_processing_pipeline(
feature_names=FEATURE_NAMES,
num_bits_for_hashing=NUM_BITS_FOR_HASHING
)
# Instantiate classifier
# (a logistic regression model with Stochastic Gradient Descent)
clf = SGDClassifier(loss='log')
# Train model in mini-batches
batch_size = 8000
for rows, labels in batched_lines(batch_size, parsed_lines):
processed_rows = pre_processing_pipeline.fit_transform(rows)
clf.partial_fit(processed_rows, labels, classes=CLASSES)
print clf
# Save model
joblib.dump(clf, model_filename)
# Reload just to make sure it serializes and de- properly
joblib.load(model_filename)