本文整理汇总了Python中sklearn.pipeline.Pipeline.predict方法的典型用法代码示例。如果您正苦于以下问题:Python Pipeline.predict方法的具体用法?Python Pipeline.predict怎么用?Python Pipeline.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.pipeline.Pipeline
的用法示例。
在下文中一共展示了Pipeline.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import predict [as 别名]
def main():
Xraw,channels,marker = intialize_source()
Xraw = EEGData(Xraw,channels)
#Xraw.reject_channels(chnames=["O1","O2","P7","P8","T7","T8"])
Xraw.avg_channel_pairs([('O1','O2'),('F3','F4'),('AF3','AF4'),('P7','P8')])
#View Filtered Raw Data
X_p300 = Xraw.extract_epochs(Xmarker = marker,marker = 1)
X_nonp300 = Xraw.extract_epochs(Xmarker = marker,marker = 2)
X = np.concatenate((X_p300,X_nonp300))
y = np.concatenate((np.ones(X_p300.shape[0]),np.zeros(X_nonp300.shape[0])))
xy_tuples = zip(X,y)
np.random.seed(15)
np.random.shuffle(xy_tuples)
acc_list = []
kf = KFold(n=len(xy_tuples),n_folds=4,shuffle=False,random_state=None)
for train_indices,test_indices in kf:
X_train,y_train = (np.array(l) for l in zip(*[xy_tuples[i] for i in train_indices]))
X_test,y_test = (np.array(l) for l in zip(*[xy_tuples[i] for i in test_indices]))
eeg_clf = Pipeline([
('filter',ChebshevFilter(order=6)),
('featurecreation',ConcatEEGPreprocessor(downsample=False)),
('predictor',LogisticRegression())
])
eeg_clf.fit(X_train,y_train)
print confusion_matrix(y_train,eeg_clf.predict(X_train))
print confusion_matrix(y_test,eeg_clf.predict(X_test))
acc_list.append(accuracy_score(y_test,eeg_clf.predict(X_test))*100)
print "Average Accuracy :" + str(np.mean(acc_list))
示例2: run_classifier
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import predict [as 别名]
def run_classifier(classifier, cl_input, name):
"""This function is the generic function that runs any single sklearn classifier given it
and produces a corresponding csv file"""
#Create a pipeline to do feature transformation and then run those transformed features through a classifier
pipeline = Pipeline([
('date_split', TimestampTransformer()),
('classifier', classifier)
])
# Fit the classifier
pipeline.fit(cl_input.train_data, cl_input.train_targets)
# Make predictions on dev data
dev_predictions = pipeline.predict(cl_input.dev_data)
# print dev_predictions, dev_targets
create_csv_submission(
'./'+name+'/dev_sub.csv', cl_input.dev_data, dev_predictions)
# Make predictions based on the actual test data.
predictions = pipeline.predict(cl_input.raw_eval)
create_csv_submission(
'./'+name+'/eval_sub.csv', cl_input.raw_eval, predictions)
#Return the Root Mean Square Logarithmic Error
return RMSLE(cl_input.dev_targets, dev_predictions)
示例3: bestClassify
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import predict [as 别名]
def bestClassify(Xtrain,Ytrain, Xtest, Ytest):
"Best classifier function"
tfidf = True
if tfidf:
vec = TfidfVectorizer(preprocessor = identity,
tokenizer = identity)
else:
vec = CountVectorizer(preprocessor = identity,
tokenizer = identity)
classifier = Pipeline( [('vec', vec),
('cls', MultinomialNB(alpha=0.23))] )
t0 = time.time()
classifier.fit(Xtrain, Ytrain)
train_time = time.time() - t0
t1 = time.time()
classifier.predict(Xtest)
Yguess = classifier.predict(Xtest)
test_time = time.time() - t1
#print("Train time:", train_time)
#print("Test time", test_time)
return Yguess
示例4: score_for_params
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import predict [as 别名]
def score_for_params(kw) :
'''
| Get score from just one set of parameters
| Takes in keyword arguments, including whether or not median filter will be included.
'''
# Turn the tuple into a packed dictionary to get all parameters
params = _get_params(logistic_regression__C=kw)
# Create the pipeline which consists of image
# processing and a classifier
# Note - can make this map to a dictionary of image processors instead of just median
image_processors = [ ('hog', image_processing.HOG()) ]
if params.pop('median') :
image_processors.insert(0,('median_smooth', image_processing.MedianSmooth()))
else :
params.pop('median_smooth__radius')
classifier = ('logistic_regression', LogisticRegression())
estimators = image_processors + [classifier]
pipeline = Pipeline(estimators)
# Create the grid search with list of parameters
# to search. All values are now tuples
pipeline.set_params(**params).fit(X_train, y_train)
pipeline.predict(X_test)
return pipeline.score(X_train,y_train), pipeline.score(X_test, y_test)
示例5: unitSizeAnalysis
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import predict [as 别名]
def unitSizeAnalysis(data):
units = range(3,35) + [200, 300, 400, 500, 600, 700, 800, 900]
validationSplit = 0.6
split = math.floor(len(data['label']) * validationSplit)
trainingSplit = {'train': data['train'][:split], 'label': data['label'][:split]}
testingSplit = {'train': data['train'][split:], 'label': data['label'][split:]}
for i in units:
pipeline = Pipeline([
('min/max scaler', MinMaxScaler(feature_range=(0.0, 1.0))),
('neural network', Classifier(layers=[
Layer("Rectifier", units=i),
Layer("Gaussian", units=i),
#Layer("Maxout", units=100, pieces=2),
Layer("Softmax")],
learning_rate=0.001,
n_iter=25))])
pipeline.fit(trainingSplit['train'], trainingSplit['label'])
testAcc = accuracy_score(testingSplit['label'], pipeline.predict(testingSplit['train']))
trainingAcc = accuracy_score(trainingSplit['label'], pipeline.predict(trainingSplit['train']))
print str(i) + "," + str(testAcc) + "," + str(trainingAcc)
示例6: test
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import predict [as 别名]
def test():
target_label = [u'weather', u'audio',u'pic',u'calculate',u'music', u'poem']
training_text_raw = []
training_label = []
with open ('./training_source.csv','r') as f:
for line in f.readlines():
line = line.strip().split('\t')
if len(line) > 1 and line[1] in target_label:
training_text_raw.append(unicode(line[0],"utf-8"))
training_label.append(line[1])
print training_label
training_text = []
for text in training_text_raw:
seg_text = seg(text)
training_text.append(seg_text)
text_clf = Pipeline([('vect', CountVectorizer()),
('tfidf', TfidfTransformer(use_idf=False)),
('clf', MultinomialNB()),
])
scores = cross_validation.cross_val_score(text_clf, training_text, training_label, cv=8)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
text_clf.fit(training_text, training_label)
while True:
k_text = raw_input("\nPlease input:")
if k_text == "exit":
break
print text_clf.predict([seg(unicode(k_text,'utf-8'))])
示例7: test_one_rf
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import predict [as 别名]
def test_one_rf():
Xtrain_raw, ytrain_raw = load_raw_data("sentidata_train_raw.pkl")
print "training data loaded"
print_label_frequency(ytrain_raw)
############# create the pipeline
pipeline = Pipeline([
('vect', CountVectorizer(analyzer=lambda x:x,max_features=3000)),
('tfidf', TfidfTransformer()),
('rf', RandomForestClassifier(n_estimators=500,
max_depth=200,
min_samples_split=10,
oob_score=True,
n_jobs=-1,verbose=1,class_weight='balanced')),
])
############# train
pipeline.fit(Xtrain_raw,ytrain_raw)
############# check result
rf = pipeline.steps[-1][1]
rf.oob_score_
############# training error
ytrain_predict = pipeline.predict(Xtrain_raw)
print classification_report(y_true=ytrain_raw,y_pred=ytrain_predict)
print confusion_matrix(y_true=ytrain_raw,y_pred=ytrain_predict)
############# testing error
Xtest_raw, ytest_raw = load_raw_data("sentidata_test_raw.pkl")
ytest_predict = pipeline.predict(Xtest_raw)
accuracy_score(y_true=ytest_raw,y_pred=ytest_predict)
print classification_report(y_true=ytest_raw,y_pred=ytest_predict)
示例8: fit_predict
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import predict [as 别名]
def fit_predict(self, time_budget):
""" XX. """
time_spent = 0
ts = time.time()
f = make_pipeline(VarianceThreshold(), SelectIndependent(), StandardScaler())
X_train = f.fit_transform(self.data["X_train"])
X_valid = f.transform(self.data['X_valid'])
X_test = f.transform(self.data['X_test'])
time_spent = time.time() - ts
ts = time.time()
time_budget -= time_spent
# Benchmark cycle
clf = Pipeline([
('filterF', GenericUnivariateSelect(f_classif, 'k_best', 'all')),
('clf', RandomForestClassifier(n_jobs=-1, n_estimators=self.benchmark_predictors))
])
clf.fit(X_train, self.data["Y_train"])
time_spent = time.time() - ts # Compute remaining time
ts = time.time()
time_budget -= time_spent
print "Cycle #0: {} estimators, {:.2f} sec".format(
self.benchmark_predictors, time_spent)
self.results.append({
"Y_valid": clf.predict(f.transform(self.data['X_valid'])),
"Y_test": clf.predict(f.transform(self.data['X_test']))
})
N = self.benchmark_predictors * \
int(np.floor(time_budget / (self.K * self.cv * time_spent)))
# Search the best filter param
feat_num_clean = X_train.shape[1]
k_range = np.unique(range(feat_num_clean, 1, -feat_num_clean/self.K))
param_grid = [{'filterF__param': k_range}]
clf = Pipeline([
('filterF', GenericUnivariateSelect(f_classif, 'k_best')),
('clf', RandomForestClassifier(n_jobs=-1, n_estimators=N))
])
gs = GridSearchCV(
clf, param_grid, cv=self.cv,
scoring=make_scorer(bac_metric_wrapper))
gs.fit(X_train, self.data["Y_train"])
clf = gs.best_estimator_
self.results.append({
"Y_valid": clf.predict(X_valid),
"Y_test": clf.predict(X_test)
})
print "Cycle #1: {} estimators, {:.2f} sec".format(N, time.time() - ts)
print "Result: K={}, score={:.2f}".format(
gs.best_params_['filterF__param'], gs.best_score_)
return
示例9: test_predict_with_predict_params
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import predict [as 别名]
def test_predict_with_predict_params():
# tests that Pipeline passes predict_params to the final estimator
# when predict is invoked
pipe = Pipeline([('transf', Transf()), ('clf', DummyEstimatorParams())])
pipe.fit(None, None)
pipe.predict(X=None, got_attribute=True)
assert_true(pipe.named_steps['clf'].got_attribute)
示例10: kfold_crossvalidation
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import predict [as 别名]
def kfold_crossvalidation(learner,k,n,examples,cv,labels):
bestPip = 0
bestErrV = 999999
bestErrT = 999999
bestAlph = 999999
if(k<2):
Error("Error n in Kfold must be >= 2")
for size in range (1,n+1):
#Factory para la creacion del pipeline con la distribucion que modelara nuestra red
#asi como la bolsa de palabras correspondiente en cada caso
if (learner=="MultinomialNB CV"):
pipeline = Pipeline([\
('vect', cv),\
('clf', MultinomialNB(size ,fit_prior=True, class_prior=None)),])
if (learner=="MultinomialNB N"):
pipeline = Pipeline([\
('vect', cv),\
('tfidf', TfidfTransformer()),\
('clf', MultinomialNB(size ,fit_prior=True, class_prior=None)),])
if (learner=="BernoulliNB"):
pipeline = Pipeline([\
('vect', cv),\
('clf', BernoulliNB(size, fit_prior=True, class_prior=None)),])
i = 1
errV = 0
errT = 0
kf = KFold(len(examples),k)
for train_index,validation_index in kf:
#Extraemos las palabras necesarias
if i == 1 or i == k:
train = examples[train_index[0]:train_index[-1]]
train_labels = labels[train_index[0]:train_index[-1]]
validation = examples[validation_index[0]:validation_index[-1]]
validation_labels = labels[validation_index[0]:validation_index[-1]]
else:
train = examples[train_index[0]:validation_index[0]-1] + examples[validation_index[-1]+1:train_index[-1]]
train_labels = labels[train_index[0]:validation_index[0]-1] + labels[validation_index[-1]+1:train_index[-1]]
validation = examples[validation_index[0]:validation_index[-1]]
validation_labels = labels[validation_index[0]:validation_index[-1]]
i += 1
#Entrenamos la red y calculamos los errores
pipeline.fit(train,train_labels)
predicted = pipeline.predict(validation)
errV += metrics.f1_score(validation_labels, predicted)
predicted = pipeline.predict(train)
errT += metrics.f1_score(train_labels, predicted)
#Hacemos el calculo medio del error
errT = errT/k
errV = errV/k
if(bestErrV > errV):
bestErrV = errV
bestErrT = errT
bestPip = pipeline
bestAlph = size
return [bestN,errV,errT,bestAlph]
示例11: use
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import predict [as 别名]
def use(method):
if method == 'naive bayes':
estimators = [("skb", SelectKBest(score_func=f_classif)),('pca', PCA()),
('bayes',GaussianNB())]
clf = Pipeline(estimators)
parameters = {"skb__k":[8,9,10,11,12],
"pca__n_components":[2,6,4,8]}
clf = grid_search.GridSearchCV(clf, parameters)
scaler = MinMaxScaler()
features_train_scaled = scaler.fit_transform(features_train)
features_test_scaled = scaler.transform(features_test)
clf.fit(features_train_scaled, labels_train)
pred = clf.predict(features_test_scaled)
print clf.best_params_
features_k = clf.best_params_['skb__k']
SKB_k = SelectKBest(f_classif, k = features_k)
SKB_k.fit_transform(features_train_scaled, labels_train)
print "features score: "
print SKB_k.scores_
features_selected = [features_list[1:][i]for i in SKB_k.get_support(indices=True)]
print features_selected
elif method == 'svm':
estimators = [('reduce_dim', PCA()), ('svc', SVC())]
clf = Pipeline(estimators)
parameters = {'svc__C': [1,10]}
clf = grid_search.GridSearchCV(clf, parameters)
scaler = MinMaxScaler()
features_train_scaled = scaler.fit_transform(features_train)
features_test_scaled = scaler.transform(features_test)
clf.fit(features_train_scaled, labels_train)
pred = clf.predict(features_test_scaled)
print clf.best_estimator_
elif method == 'decision tree':
estimators = [("skb", SelectKBest(score_func=f_classif)),('pca', PCA()),
('tree', tree.DecisionTreeClassifier())]
clf = Pipeline(estimators)
parameters = {"tree__min_samples_split": [2,10],"skb__k":[8,9,10,11,12],
"pca__n_components":[2,4,6,8]}
clf = grid_search.GridSearchCV(clf, parameters)
scaler = MinMaxScaler()
features_train_scaled = scaler.fit_transform(features_train)
features_test_scaled = scaler.transform(features_test)
clf.fit(features_train_scaled, labels_train)
pred = clf.predict(features_test_scaled)
print clf.best_params_
features_k = clf.best_params_['skb__k']
SKB_k = SelectKBest(f_classif, k = features_k)
SKB_k.fit_transform(features_train, labels_train)
features_selected = [features_list[1:][i]for i in SKB_k.get_support(indices=True)]
print features_selected
accuracy = accuracy_score(labels_test, pred)
print "accuracy score:"
print accuracy
calculate_precision_recall(pred, labels_test)
示例12: run_naive_bayes
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import predict [as 别名]
def run_naive_bayes(use_tfidf):
Xtrain_all, ytrain_all = tac.load_raw_data("sentidata_train_raw.pkl")
tac.print_label_frequency("Train",ytrain_all)
############# create the pipeline
if use_tfidf:
pipeline = Pipeline([
('vect', CountVectorizer(analyzer=tac.do_nothing)),
('tfidf', TfidfTransformer()),
('nb', MultinomialNB())
])
else:
pipeline = Pipeline([
('vect', CountVectorizer(analyzer=tac.do_nothing)),
('nb', MultinomialNB())
])
############# search and fit
# parameters = {
# 'vect__max_features': (None,),
# # 'vect__max_features': (1000, 2000, 3000, 4000, 50000, None),
# }
# scoring_method = "roc_auc"
# validate_split = tac.make_train_validate_split(len(ytrain_all))
# searchcv = GridSearchCV(estimator=pipeline,
# param_grid=parameters,
# scoring=scoring_method,
# n_jobs=-1,
# verbose=1,
# cv=validate_split)
#
# ############# search
# print "#################### search cv begins"
# searchcv.fit(Xtrain_all, ytrain_all)
# print "#################### search cv ends"
# print "best {}: {}".format(scoring_method, searchcv.best_score_)
# print "best parameters: ", searchcv.best_params_
#
# ############# save
# pipeline = searchcv.best_estimator_
pipeline.fit(Xtrain_all,ytrain_all)
common.simple_dump("sentimodel_nb.pkl",pipeline)
############# training error analysis
ytrain_predict = pipeline.predict(Xtrain_all)
tac.print_classification_report('Training Data',ytrain_all,ytrain_predict)
############# test error analysis
Xtest, ytest = tac.load_raw_data("sentidata_test_raw.pkl")
tac.print_label_frequency("Test",ytest)
ytest_predict = pipeline.predict(Xtest)
tac.print_classification_report('Testing Data',ytest,ytest_predict)
示例13: LogisticBaseModel
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import predict [as 别名]
class LogisticBaseModel(ClassifierBase):
"""Basic Logistic Classifier
Parameters
----------
:param pre_process_pipelin: sklearn pipeline
"""
def __init__(self, pre_process_pipeline=None, **kwargs):
if pre_process_pipeline is None:
self._model = LogisticRegression(**kwargs)
else:
pre_process_pipeline.append(
('clf', LogisticRegression(**kwargs))
)
self._model = Pipeline(pre_process_pipeline)
def fit(self, x, y):
"""Fit the logistic model
:param x: feature matrix
:param y: labels
"""
self._model.fit(x, y)
def predict(self, x):
"""Predict labels from inputted features
:param x: array of features
:return: array of class prediction
"""
return self._model.predict(x)
def predict_proba(self, x):
"""Predict classes and corresponding probabilities for features x
:param x: array of features
:return: classes and probabilities array
"""
return self._model.predict(x)
def save(self, path):
"""Save this model as pickle
:param path: path to pickle model
"""
joblib.dump(self._model, path)
def load(self, path):
"""Load pickled model to this one
:param path: path of already pickled model
"""
self._model = joblib.load(path)
示例14: optimize_char_ngram
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import predict [as 别名]
def optimize_char_ngram():
log_reg_clf = LogisticRegression(C = 10)
#svm_clf = svm.SVC(C = 10)
#classifiers = [log_reg_clf, svm_clf]
tfidf_vect = TfidfVectorizer()
estimators = [('vect', tfidf_vect), ('clf', log_reg_clf)]
clf = Pipeline(estimators)
clf.set_params(vect__analyzer = 'char')
n_gram_range = np.array(range(1, 5))
scores_train_vec = np.zeros(n_gram_range.shape)
scores_test_vec = np.zeros(n_gram_range.shape)
for n_gram_ind in range(len(n_gram_range)):
print("%d n_gram:" % n_gram_range[n_gram_ind])
clf.set_params(vect__ngram_range=(1, n_gram_range[n_gram_ind]))
#cv = cross_validation.ShuffleSplit(X_train.shape[0], n_iter = 20, test_size = 0.2)
cv = cross_validation.KFold(X_train.shape[0], n_folds = 6)
scores_train = []
scores_test = []
for train_index, test_index in cv:
clf.fit(X_train[train_index], np.ravel(y_train[train_index]))
y_train_pred = clf.predict(X_train[train_index])
train_score = roc_auc_score_(y_train_pred, np.ravel(y_train[train_index]))
print train_score
scores_train.append(train_score)
y_test_pred = clf.predict(X_train[test_index])
test_score = roc_auc_score_(y_test_pred, np.ravel(y_train[test_index]))
print test_score
scores_test.append(test_score)
scores_train_vec[n_gram_ind] = np.mean(scores_train)
scores_test_vec[n_gram_ind] = np.mean(scores_test)
print(scores_train_vec)
print(scores_test_vec)
plt.figure()
plt.plot(n_gram_range, scores_train_vec)
plt.hold(True)
plt.plot(n_gram_range, scores_test_vec)
plt.grid(True)
plt.show()
示例15: main
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import predict [as 别名]
def main(fst):
input_number = float(fst.readline())
x = []
y = []
for line in open('trainingdata.txt').readlines():
xi, yi = line.split(',')
x.append([float(xi)])
y.append(float(yi))
model = Pipeline([('poly', PolynomialFeatures(degree=7)), ('linear', LinearRegression())])
model.fit (x, y)
print model.predict(input_number)