本文整理汇总了Python中sklearn.pipeline.Pipeline.score方法的典型用法代码示例。如果您正苦于以下问题:Python Pipeline.score方法的具体用法?Python Pipeline.score怎么用?Python Pipeline.score使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.pipeline.Pipeline
的用法示例。
在下文中一共展示了Pipeline.score方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_pipeline_methods_preprocessing_svm
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import score [as 别名]
def test_pipeline_methods_preprocessing_svm():
# Test the various methods of the pipeline (preprocessing + svm).
iris = load_iris()
X = iris.data
y = iris.target
n_samples = X.shape[0]
n_classes = len(np.unique(y))
scaler = StandardScaler()
pca = RandomizedPCA(n_components=2, whiten=True)
clf = SVC(probability=True, random_state=0)
for preprocessing in [scaler, pca]:
pipe = Pipeline([('preprocess', preprocessing), ('svc', clf)])
pipe.fit(X, y)
# check shapes of various prediction functions
predict = pipe.predict(X)
assert_equal(predict.shape, (n_samples,))
proba = pipe.predict_proba(X)
assert_equal(proba.shape, (n_samples, n_classes))
log_proba = pipe.predict_log_proba(X)
assert_equal(log_proba.shape, (n_samples, n_classes))
decision_function = pipe.decision_function(X)
assert_equal(decision_function.shape, (n_samples, n_classes))
pipe.score(X, y)
示例2: score_for_params
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import score [as 别名]
def score_for_params(kw) :
'''
| Get score from just one set of parameters
| Takes in keyword arguments, including whether or not median filter will be included.
'''
# Turn the tuple into a packed dictionary to get all parameters
params = _get_params(logistic_regression__C=kw)
# Create the pipeline which consists of image
# processing and a classifier
# Note - can make this map to a dictionary of image processors instead of just median
image_processors = [ ('hog', image_processing.HOG()) ]
if params.pop('median') :
image_processors.insert(0,('median_smooth', image_processing.MedianSmooth()))
else :
params.pop('median_smooth__radius')
classifier = ('logistic_regression', LogisticRegression())
estimators = image_processors + [classifier]
pipeline = Pipeline(estimators)
# Create the grid search with list of parameters
# to search. All values are now tuples
pipeline.set_params(**params).fit(X_train, y_train)
pipeline.predict(X_test)
return pipeline.score(X_train,y_train), pipeline.score(X_test, y_test)
示例3: main
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import score [as 别名]
def main(test_size=0.1, n_iter = 10, learning_rate = 0.01):
n_components = 200 # number of neurons in hidden layer
verbose = True
print '... load and setup data'
dataset = load_data()
trainX, testX, trainY, testY = split_data(dataset, test_size=test_size, random_state=42)
print '... building the model structure'
# initialize the RBM + Logistic Regression classifier with the cross-validated parameters
rbm = BernoulliRBM(n_components = n_components, n_iter = n_iter,
learning_rate = learning_rate)
logistic = LogisticRegression(C = 1.0)
print '... training the model'
# train the classifier and show an evaluation report
classifier = Pipeline([("rbm", rbm), ("logistic", logistic)])
classifier.fit(trainX, trainY)
# rbm.fit(trainX, trainY)
print '... evaluate model'
# Predict does not exist and its not clear how to get value out
print classifier.score(testX, testY)
示例4: crossvalidate
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import score [as 别名]
def crossvalidate(classifier, data, subreddits):
print classifier
#split data into training and test sets
test_fraction = 0.2
x_train, x_test, y_train, y_test = train_test_split(data, subreddits, test_size=test_fraction, random_state=2)
tfv = TfidfVectorizer(tokenizer=tokenize, stop_words='english')
# CountVectorizer(tokenizer=tokenize, stop_words='english')
clf = Pipeline([ ('vect', tfv), ('clf',classifier) ])
evaluate_cross_validation(clf, x_train, y_train, 5)
print
print
#train the clf classifier and asses its accuracy
clf.fit(x_train, y_train)
print 'accuracy on training set = ', clf.score(x_train, y_train)
print 'accuracy on testing set = ', clf.score(x_test, y_test)
y_pred = clf.predict(x_test)
print metrics.classification_report(y_test, y_pred)
print 'confusion matrix'
print metrics.confusion_matrix(y_test, y_pred)
print
print_top10(tfv, classifier, sorted(list(set(subreddits))))
示例5: useTFIDF
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import score [as 别名]
def useTFIDF():
print "TFIDF"
trainData = pd.read_csv("data/multinomialTrain.csv", header=0)
# dat = trainData[["rating", 'numDet', 'innerPunctuation','avgWordLength',
# 'numPresVerb', "numFirstPerson",'numPropNoun', "numOtherNoun", "numWords", "numAdj",
# "numPastVerb", "numConj", "exclamationPoints"]]
dat = trainData
knn = KNeighborsClassifier(n_neighbors=21, weights='distance')
scaler = preprocessing.StandardScaler()
tfidf = TfidfTransformer()
tfidf_scaled_knn = Pipeline([('tfidf', tfidf), ('knn', knn)])
kf = KFold(len(trainData), n_folds=3, shuffle=True)
for train, test in kf:
trainX, trainy = transform_sklearn_dictionary(transform_csv(dat.iloc[train], target_col="rating",
ignore_cols=["01v234", "2v34", "words","words_nostopwords",
"review", 'numDet', 'innerPunctuation','avgWordLength','numPresVerb', "numFirstPerson",'numPropNoun', "numOtherNoun", "numWords", "numAdj",
"numPastVerb", "numConj", "exclamationPoints"]))
testX, testy = transform_sklearn_dictionary(transform_csv(dat.iloc[test], target_col="rating",
ignore_cols=["01v234", "2v34", "words","words_nostopwords",
"review", 'numDet', 'innerPunctuation','avgWordLength','numPresVerb', "numFirstPerson",'numPropNoun', "numOtherNoun", "numWords", "numAdj",
"numPastVerb", "numConj", "exclamationPoints"]))
tfidf_scaled_knn.fit(trainX, trainy)
print tfidf_scaled_knn.score(testX, testy)
示例6: pipe_kf_validation
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import score [as 别名]
def pipe_kf_validation(self):
pipe_lr = Pipeline([('scl',StandardScaler()),
# ('pca',PCA(n_components=2)),
('clf',LogisticRegression(penalty='l1',random_state=1,C=0.1))])
pipe_lr.fit(self.X_train,self.y_train)
print('Test Accuracy: %.3f' % pipe_lr.score(self.X_test, self.y_test))
#K Fold Validation Score
scores=[]
scores=cross_val_score(estimator=pipe_lr,X=self.X_train,y=self.y_train,cv=10,n_jobs=1)
print('cv Accuracy scores: %s' %scores)
print ('cv Accuracy: %.3f +/- %.3f' %(np.mean(scores),np.std(scores)))
# Stratified K Fold Validation
kfold=StratifiedKFold(y=self.y_train,n_folds=10,random_state=1)
scores=[]
for k, (train,test) in enumerate(kfold):
pipe_lr.fit(self.X_train[train],self.y_train[train])
score=pipe_lr.score(self.X_train[test],self.y_train[test])
scores.append(score)
print('Fold: %s,Class dist:%s ,Acc: %.3f' %(k+1,np.bincount(self.y_train[train]),score))
print ('cv Accuracy: %.3f +/- %.3f' %(np.mean(scores),np.std(scores)))
示例7: extra_data
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import score [as 别名]
def extra_data():
pca = ("pca", IncrementalPCA(n_components=500))
scale = ("scale", StandardScaler())
bias = ("bias", AddBiasTerm())
svc = ("svc", SVC(kernel="poly", degree=best_degree, C=best_c, gamma=best_gamma))
p = Pipeline([pca, scale, bias, svc])
n = 50000
num_slices = 50
N,M = Xtrain_full.shape
scores_perturb = []
scores_extra = []
extra_data, extra_targets = gd.generate_extra_data(n)
perturbed_data, perturbed_targets = gd.perturb_modified_digits(Xtrain_full, Ytrain_full, n)
for i in range(-1,num_slices):
print "Iteration "+ str(i+2)
Xp = np.vstack((Xtrain_full, extra_data[0:(i+1)*n/num_slices]))
Yp = np.vstack((Ytrain_full, extra_targets[0:(i+1)*n/num_slices]))
p.fit(Xp, Yp)
scores_extra.append({"train_score": p.score(Xp, Yp), "test_score": p.score(Xtest, Ytest),
"num_examples":N+(i+1)*n/num_slices, "i":i})
Xp = np.vstack((Xtrain_full, perturbed_data[0:(i+1)*n/num_slices]))
Yp = np.vstack((Ytrain_full, perturbed_targets[0:(i+1)*n/num_slices]))
p.fit(Xp, Yp)
scores_perturb.append({"train_score": p.score(Xp, Yp), "test_score": p.score(Xtest, Ytest),
"num_examples":N+(i+1)*n/num_slices, "i":i})
d = {"scores_perturb":scores_perturb, "scores_extra": scores_extra}
with open("ignore/extra_data_graph.json", "w") as f:
json.dump(d, f)
示例8: run_model
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import score [as 别名]
def run_model(self, alpha=0.0001, batch_size=200, learning_rate_init=0.001, power_t=0.5, max_iter=200, momentum=0.9, beta_1=0.9, beta_2=0.999, hidden_layer_sizes=(100,), do_plot=True):
# we need to standardize the data for the learner
pipe_clf = Pipeline([ ('scl', StandardScaler() ),
('clf', MLPClassifier(alpha=alpha,
batch_size=batch_size,
learning_rate_init=learning_rate_init,
power_t=power_t,
max_iter=max_iter,
momentum=momentum,
beta_1=beta_1,
beta_2=beta_2,
hidden_layer_sizes=hidden_layer_sizes))])
# test it: this should match the non-pipelined call
pipe_clf.fit(self.x_train, self.y_train)
# check model accuracy
train_score = pipe_clf.score(self.x_train, self.y_train)
print('Training score is', train_score)
test_score = pipe_clf.score(self.x_test, self.y_test)
print('Test score is', test_score)
if do_plot:
self.__plot_learning_curve(pipe_clf)
return train_score, test_score
示例9: use_kfold_cross_validation
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import score [as 别名]
def use_kfold_cross_validation(X_train, X_test, y_train, y_test):
pipe_lr = Pipeline([
('scl', StandardScaler()),
('pca', PCA(n_components=2)),
('clf', LogisticRegression(random_state=1)),
])
pipe_lr.fit(X_train, y_train)
print("Test accuracy: %.3f\n" % pipe_lr.score(X_test, y_test))
kfold = StratifiedKFold(y=y_train, n_folds=10, random_state=1)
scores = []
for k, (train, test) in enumerate(kfold):
pipe_lr.fit(X_train[train], y_train[train])
score = pipe_lr.score(X_train[test], y_train[test])
scores.append(score)
print(
"Fold: %s, Class dist.: %s, Acc: %.3f" %
(k+1, np.bincount(y_train[train]), score)
)
print(
"\nCustom CV accuracy: %.3f +/- %.3f\n" %
(np.mean(scores), np.std(scores)),
)
scores = cross_val_score(estimator=pipe_lr, X=X_train, y=y_train, cv=10)
print("cross_val_score CV accuracy scores: %s" % scores)
print(
"cross_val_score CV accuracy: %.3f +/- %.3f" %
(np.mean(scores), np.std(scores))
)
示例10: allFeatureClassify
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import score [as 别名]
def allFeatureClassify(cosine=False):
print "AllFeatureClassifier"
if cosine:
print "Cosine"
trainData = pd.read_csv("data/multinomialTrain.csv", header=0)
# dat = trainData[["rating", 'numDet', 'innerPunctuation','avgWordLength',
# 'numPresVerb', "numFirstPerson",'numPropNoun', "numOtherNoun", "numWords", "numAdj",
# "numPastVerb", "numConj", "exclamationPoints"]]
dat = trainData
if cosine:
knn = KNeighborsClassifier(n_neighbors=21, metric=pairwise.cosine_similarity)
else:
knn = KNeighborsClassifier(n_neighbors=21)
scaler = preprocessing.StandardScaler()
scaled_knn = Pipeline([('scaler', scaler), ('knn', knn)])
kf = KFold(len(trainData), n_folds=3, shuffle=True)
for train, test in kf:
trainX, trainy = transform_sklearn_dictionary(transform_csv(dat.iloc[train], target_col="rating",
ignore_cols=["01v234", "2v34", "words",
"words_nostopwords", "review"]))
testX, testy = transform_sklearn_dictionary(transform_csv(dat.iloc[test], target_col="rating",
ignore_cols=["01v234", "2v34", "words",
"words_nostopwords", "review"]))
scaled_knn.fit(trainX, trainy)
print scaled_knn.score(testX, testy)
示例11: scikitClassifier
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import score [as 别名]
def scikitClassifier(self):
classifier = Pipeline([
('vectorizer', CountVectorizer()),
('tfidf', TfidfTransformer()),
('classifier', svm.SVC(kernel='linear', cache_size=5000)
)])
#OneVsRestClassifier(LinearSVC()) - 92.3
#MultinomialNB(alpha=1.0,class_prior=None,fit_prior=True) - 86.7
#generally sentiment analysis works better with BernoulliNB because of boolean nature
#BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True) - 84.5
#svm.SVC(kernel='linear', cache_size=4000) - 92.3
self.scikitKFold(classifier)
size = len(self.features_X) * 2 / 3
X_train = np.array(self.features_X[0:size])
y_train = np.array(self.features_Y[0:size])
print(' feaxtureX ',X_train[1:4],' length ',len(X_train), ' y_train ',y_train[1:10], len(y_train) )
X_test = np.array(self.features_X[size + 1: len(self.features_X)])
y_test = np.array(self.features_Y[size + 1: len(self.features_Y)])
classifier = classifier.fit(X_train, y_train)
print "Scores @Scikit", classifier.score(X_test, y_test)
print 'Lets Predict=> ',classifier.predict(['I definately didn\'t like the acting and it was too boring'])
XTEST,YTEST = self.getUnknownTestSet()
print "KARMA ", classifier.score(XTEST, YTEST)
示例12: test_pipeline_sample_weight_supported
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import score [as 别名]
def test_pipeline_sample_weight_supported():
# Pipeline should pass sample_weight
X = np.array([[1, 2]])
pipe = Pipeline([('transf', Transf()), ('clf', FitParamT())])
pipe.fit(X, y=None)
assert_equal(pipe.score(X), 3)
assert_equal(pipe.score(X, y=None), 3)
assert_equal(pipe.score(X, y=None, sample_weight=None), 3)
assert_equal(pipe.score(X, sample_weight=np.array([2, 3])), 8)
示例13: with_support_vector_machines
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import score [as 别名]
def with_support_vector_machines():
"""
C :
The C parameter tells the SVM optimization how much you want to avoid misclassifying each training example.
For large values of C, the optimization will choose a smaller-margin hyperplane if that hyperplane does a better job
of getting all the training points classified correctly. Conversely, a very small value of C will cause the optimizer
to look for a larger-margin separating hyperplane, even if that hyperplane misclassifies more points. For very tiny values of C,
you should get misclassified examples, often even if your training data is linearly separable.
"""
pipeline = Pipeline([ ('vect', CountVectorizer(ngram_range=(1, 4), analyzer="char_wb")),
('tfidf', TfidfTransformer()),
('chi2', SelectKBest(chi2, k="all")),
('clf', SVC(C=1, kernel="linear", gamma=.0001)),
])
"""
parameters = {
'vect__ngram_range': [(1, 1), (1, 2), (1, 3), (1,4)], # unigrams or bigrams
'vect__analyzer': ['word', 'char', 'char_wb'],
'clf__kernel': ['linear', 'rbf',],
'clf__gamma': [1e-3, 1e-4],
'clf__C': [1, 10, 100, 1000]
}
classifier= GridSearchCV(pipeline, parameters, verbose=1)
"""
pipeline = Pipeline([ ('vect', CountVectorizer(ngram_range=(2, 6), analyzer="char_wb")),
('tfidf', TfidfTransformer()),
('chi2', SelectKBest(chi2, k="all")),
('clf', SVC(C=1, kernel="linear", gamma=.001)),
])
pipeline.fit(TAGS_TRAINING_SENTENCES, TAG_TARGETS)
print "Accuracy with 500 samples with svm search %.3f"%(pipeline.score(TEST_SENTENCES_500, TEST_TARGET_500))
pipeline = Pipeline([ ('vect', CountVectorizer(ngram_range=(2, 6), analyzer="char_wb")),
('tfidf', TfidfTransformer()),
('chi2', SelectKBest(chi2, k="all")),
('clf', SVC(C=1, kernel="linear", gamma=.01)),
])
pipeline.fit(TAGS_TRAINING_SENTENCES, TAG_TARGETS)
print "Accuracy with 500 samples with svm search %.3f"%(pipeline.score(TEST_SENTENCES_500, TEST_TARGET_500))
pipeline = Pipeline([ ('vect', CountVectorizer(ngram_range=(1, 6), analyzer="char_wb")),
('tfidf', TfidfTransformer()),
('chi2', SelectKBest(chi2, k="all")),
('clf', SVC(C=1, kernel="linear", gamma=0.0)),
])
pipeline.fit(TAGS_TRAINING_SENTENCES, TAG_TARGETS)
print "Accuracy with 500 samples with svm search %.3f"%(pipeline.score(TEST_SENTENCES_500, TEST_TARGET_500))
"""
示例14: test_pipeline_init_tuple
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import score [as 别名]
def test_pipeline_init_tuple():
# Pipeline accepts steps as tuple
X = np.array([[1, 2]])
pipe = Pipeline((('transf', Transf()), ('clf', FitParamT())))
pipe.fit(X, y=None)
pipe.score(X)
pipe.set_params(transf=None)
pipe.fit(X, y=None)
pipe.score(X)
示例15: main
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import score [as 别名]
def main(self):
dirname = 'features/'
rlst = ['radar1','radar2','radar3','radar4']
for rname in rlst:
filelist = get_file_list(dirname+rname+'/')
trfile = dirname+rname+'/'+rname+'_train.csv'
tsfile = dirname+rname+'/'+rname+'_test.csv'
tl = False
trl = False
test = np.array([])
train = np.array([])
for name in filelist:
file = dirname+rname+'/'+name
if search('p3',name):
if tl==False:
test = np.genfromtxt(file, delimiter=',')
tl = True
else:
tst = np.genfromtxt(file, delimiter=',')
test = np.vstack((test,tst))
# print 'test'
# print test.shape
else:
if trl==False:
train = np.genfromtxt(file, delimiter=',')
trl = True
else:
tr = np.genfromtxt(file, delimiter=',')
train = np.vstack((train,tr))
# print 'train'
# print train.shape
np.savetxt(trfile, train, delimiter=",")
np.savetxt(tsfile, test, delimiter=",")
print rname
print '______'
clf = Pipeline([
('feature_selection', LinearSVC(C=0.01, penalty="l1", dual=False)),
('classification', RandomForestClassifier())
])
X = train[:,0:25]
y = train[:,26]
# print y
clf.fit(X, y)
y_ = clf.predict(test[:,0:25])
print y_
# print clf.score(test[:,0:25],test[:,26])
# clf.predict()
y = test[:,26]
cm = confusion_matrix(y, y_)
print cm
print cm.shape
print clf.score(test[:,0:25],test[:,26])
print precision_recall_fscore_support(y,y_)