本文整理汇总了Python中sklearn.cross_validation.cross_val_predict函数的典型用法代码示例。如果您正苦于以下问题:Python cross_val_predict函数的具体用法?Python cross_val_predict怎么用?Python cross_val_predict使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了cross_val_predict函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: perform_classifier_cross_validation
def perform_classifier_cross_validation(classifier, dtm_train,targets_train,
dtm_test, targets_test):
cv = 3
k_fold = KFold(len(targets_train), n_folds=cv,shuffle=True,
random_state=42)
scoring = 'f1_macro'
scores = cross_validation.cross_val_score(classifier, dtm_train,
targets_train,cv=k_fold,
scoring=scoring)
print("Same classifier with cross validation:")
print("Scores for folds" +"("+str(cv)+"):"+ str(scores))
print(scoring + ": %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
targets_train_predicted = cross_validation.cross_val_predict(classifier,
dtm_train,targets_train, cv=cv)
print_classifier_metrics(targets_train,targets_train_predicted,
"train-with-cv")
targets_test_predicted = cross_validation.cross_val_predict(classifier,
dtm_test,targets_test,cv=cv)
print_classifier_metrics(targets_test, targets_test_predicted,
"test-with-cv")
return classifier
示例2: apply_cross_validated_learning
def apply_cross_validated_learning(datasetname, X, y, resultsfolder, nfolds=5):
dataspacename = datasetname + "_nfolds-" + str(nfolds)
experimentrootpath = IOtools.ensure_dir(os.path.join(resultsfolder, dataspacename))
scorefilepath = os.path.join(experimentrootpath, metaexperimentation.scorefilename+".csv")
metaexperimentation.initialize_score_file(scorefilepath)
# SVM
kernels = ["linear", "rbf", "sigmoid", "poly"]
Cs = [1, 10, 100, 1000]
for kernel in kernels:
for c in Cs:
alg = "SVM"
modelname = "_m-" + alg + "_k-" + kernel + "_C-" + str(c)
experimentname = "nfolds-" + str(nfolds) + modelname
clf = svm.SVC(kernel=kernel, C=c)
ypredicted = cross_validation.cross_val_predict(clf, X, y, cv=nfolds)
#print metrics.accuracy_score(y, ypredicted)
reportresults(y, ypredicted, experimentname, experimentrootpath, scorefilepath)
# Naive Bayes
NBmodels = [naive_bayes.MultinomialNB(), naive_bayes.GaussianNB()]
for nbmodel in NBmodels:
alg = "NB"
modelname = "_m-" + nbmodel.__class__.__name__
experimentname = "nfolds-" + str(nfolds) + modelname
ypredicted = cross_validation.cross_val_predict(nbmodel, X, y, cv=nfolds)
reportresults(y, ypredicted, experimentname, experimentrootpath, scorefilepath)
示例3: main
def main():
parser = argparse.ArgumentParser(description='Train an ML model')
required = parser.add_argument_group('required options')
required.add_argument('-x', '--trainfile', required=True, help='File containing training data')
required.add_argument('-y', '--targetfile', required=True, help='File containing target data')
#required.add_argument('-o', '--modelfile', required=True, help='Output filename for trained model object')
#required.add_argument('-t', '--targettype', default=int)
args = parser.parse_args()
#X = np.loadtxt(args.trainfile, skiprows=1)
X = np.loadtxt(args.trainfile)
#Y = np.loadtxt(args.targetfile, dtype=args.targettype)
#Y = np.loadtxt(args.targetfile)
Y = np.genfromtxt(args.targetfile,dtype='str')
assert len(X) == len(Y), "length mismatch between train and target data"
clf1 = linear_model.LogisticRegression(penalty='l2',C=1e5,solver='newton-cg',tol=0.00001)
clf1.fit(X, Y)
predicted1=cross_validation.cross_val_predict(clf1,X,Y,cv=2)
print("Prediction accuracy of logistic regression : ", metrics.accuracy_score(Y, predicted1))
#predicted=cross_validation.cross_val_predict(clf1,x,x_tr,cv=2)
clf2 = svm.SVC(C=1e5,kernel='rbf')
clf2.fit(X, Y)
predicted2=cross_validation.cross_val_predict(clf2,X,Y,cv=2)
print("Prediction accuracy of SVM : ", metrics.accuracy_score(Y, predicted2))
clf3 = naive_bayes.BernoulliNB(alpha=1.9)
clf3.fit(X, Y)
predicted3=cross_validation.cross_val_predict(clf3,X,Y,cv=2)
print("Prediction accuracy of naive bayes : ", metrics.accuracy_score(Y, predicted3))
clf4 = tree.DecisionTreeClassifier(criterion='entropy')
clf4.fit(X, Y)
predicted4=cross_validation.cross_val_predict(clf4,X,Y,cv=2)
print("Prediction accuracy of decision trees : ", metrics.accuracy_score(Y, predicted4))
#with open(args.modelfile, "wb") as outfile:
# pickle.dump(clf1, outfile, pickle.HIGHEST_PROTOCOL)
with open('bin_file_lr',"wb") as outfile1:
pickle.dump(clf1, outfile1, pickle.HIGHEST_PROTOCOL)
with open('bin_file_svm',"wb") as outfile2:
pickle.dump(clf2, outfile2, pickle.HIGHEST_PROTOCOL)
with open('bin_file_bayes',"wb") as outfile3:
pickle.dump(clf3, outfile3, pickle.HIGHEST_PROTOCOL)
with open('bin_file_dtree',"wb") as outfile4:
pickle.dump(clf4, outfile4, pickle.HIGHEST_PROTOCOL)
示例4: test_cross_val_predict_sparse_prediction
def test_cross_val_predict_sparse_prediction():
# check that cross_val_predict gives same result for sparse and dense input
X, y = make_multilabel_classification(
n_classes=2, n_labels=1, allow_unlabeled=False, return_indicator=True, random_state=1
)
X_sparse = csr_matrix(X)
y_sparse = csr_matrix(y)
classif = OneVsRestClassifier(SVC(kernel="linear"))
preds = cval.cross_val_predict(classif, X, y, cv=10)
preds_sparse = cval.cross_val_predict(classif, X_sparse, y_sparse, cv=10)
preds_sparse = preds_sparse.toarray()
assert_array_almost_equal(preds_sparse, preds)
示例5: test_cross_val_predict_pandas
def test_cross_val_predict_pandas():
# check cross_val_score doesn't destroy pandas dataframe
types = [(MockDataFrame, MockDataFrame)]
try:
from pandas import Series, DataFrame
types.append((Series, DataFrame))
except ImportError:
pass
for TargetType, InputFeatureType in types:
# X dataframe, y series
X_df, y_ser = InputFeatureType(X), TargetType(y)
check_df = lambda x: isinstance(x, InputFeatureType)
check_series = lambda x: isinstance(x, TargetType)
clf = CheckingClassifier(check_X=check_df, check_y=check_series)
cval.cross_val_predict(clf, X_df, y_ser)
示例6: predict_evaluate_models
def predict_evaluate_models(fn ,ax=None, sel=["Penalties_Conceeded","Tries_Scored"], goal="Referee", verbosity=0):
class_weight = 'auto'
X, y, names = data_prepare(fn, sel=sel, goal=goal, verbosity=verbosity-1)
if verbosity > 2:
y_shuffled = y.copy()
np.random.shuffle(y_shuffled)
print ("All zeros accuracy:",1.0-np.sum(y)/len(y))
print ("y_shuffled f1_csore:",metrics.f1_score(y, y_shuffled))
n_folds = 10
cv = cross_validation.StratifiedKFold(y, n_folds=n_folds)
#cv = cross_validation.LeaveOneOut(n=len(y))
results = []
for sclf in ('svm','svmp','svmr','lgCV','gnb','rf','knc'):
clf = get_clf(sclf,class_weight=class_weight)
y_pred = cross_validation.cross_val_predict(clf, X, y, cv=cv)
#print "pred:",y_pred
res = [
metrics.accuracy_score(y, y_pred),
metrics.precision_score(y, y_pred),
metrics.recall_score(y, y_pred),
metrics.f1_score(y, y_pred),
]
if verbosity > 0:
print (sclf,res)
results.append( (sclf,res) )
return results
示例7: training
def training(features, targets, feature_description,
validation_features, model_flag):
"""
Train the data with XGBoost model and 10-cross fold validation
method. Output the result in confusion matrix.
:param model_flag:
:param validation_features:
:param features: X, 2-D matrix
:param targets: Y 1-D target array
:param feature_description: brief description of the feature
"""
model_name = model_name_dict[model_flag]
model = model_dict[model_flag]
model.fit(features, targets)
prediction = model.predict(validation_features)
file_names = np.load('ZL_validation_file_names.npy')
validation_result = open('validation_result_' + model_name +
feature_description, 'w')
# output validation result with specified format.
p = re.compile('(validation\.[0-9]+)')
for i in range(len(prediction)):
# format: validation_xxxxx type
print >> validation_result, \
p.findall(file_names[i])[0].replace('.', '_'), \
type_array[int(prediction[i])]
validation_result.close()
prediction = cross_validation.cross_val_predict(
model, features, targets, cv=10)
cm = confusion_matrix(targets, prediction)
output_confusion_matrix_tex(
cm, model_name + '_' + feature_description)
示例8: kfCrossVal
def kfCrossVal(loansData):
# Import required libraries
from sklearn.cross_validation import cross_val_predict
from sklearn import linear_model
import sklearn.metrics as met
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
# Create linear regression model using FICO score as the only predictor
# Interest Rate is the dependent variable
lr = linear_model.LinearRegression()
y = loansData.as_matrix(columns=['Interest.Rate'])
x = loansData[['Loan.Length', 'FICO.Score']].as_matrix()
# Run the kfold cross validation and store the results as an array
predicted = cross_val_predict(lr, x, y, cv=10)
# Try and run as quadratic?
# POLY2 = smf.ols(formula = 'Y ~ 1 + X + I(X**2)', data=TRAIN_DF).fit()
# Calculate MAE, MSE, and R2
print("Mean Absolute Error: {}".format(met.mean_absolute_error(y, predicted)))
print("Mean Squared Error: {}".format(met.mean_squared_error(y, predicted)))
print("R Squared: {}".format(met.r2_score(y, predicted)))
# Plot the actual versus predicted values
fix, ax = plt.subplots()
ax.scatter(y, predicted)
ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()
示例9: main
def main():
dataset = samples.get_dataset()
X, y, page_labels = build_Xy_from_pages_dataset(dataset)
clf = create_classifier()
# this gives the prediction result for every element
# when it was in the test dataset during cross validation
cv_iter = cross_validation.LabelKFold(page_labels, n_folds=10)
predicted = cross_validation.cross_val_predict(clf, X, y, cv=cv_iter)
cm = metrics.confusion_matrix(y, predicted)
print('\nConfusion matrix:')
print(cm, '\n\n')
print(metrics.classification_report(y, predicted))
print('Training and peeking at the word weights...')
X_train, y_train = X[:-20], y[:-20]
clf = get_trained_classifier(X_train, y_train)
cv = clf.steps[-2][1]
svc = clf.steps[-1][1]
word_weights = zip(svc.coef_[0], cv.vocabulary_)
print('Top 10 weights for negative cases')
for weight, word in sorted(word_weights)[:10]:
print('%0.5f %s' % (weight, word))
print('\nTop 10 weights for positive cases')
for weight, word in sorted(word_weights)[-10:][::-1]:
print('%0.5f %s' % (weight, word))
import pickle
with open('classifier.pickle', 'w') as f:
pickle.dump(clf, f)
示例10: main
def main():
dataTuples=getDataInFormat()
print "Length of dataTuples is: ", len(dataTuples)
shuffle(dataTuples)
trainTuples=dataTuples
del dataTuples
ids, labels, vectors= getLabelsAndVectors(trainTuples)
del trainTuples
followerCountsList = loadFollowerCountsFromFile()
space=getSpace(vectors)
reducedSpace=getReducedSpace(vectors, space)
spaceWithMetaFeatures= augmentSpace(reducedSpace, emotionFeatures)
print "Total # of features in your space is: ", len(space)
print "Total # of features in your reducedSpace is: ", len(reducedSpace)
oneHotVectors=getOneHotVectors(ids, labels, vectors,spaceWithMetaFeatures , followerCountsList)
trainVectors, trainLabels=getOneHotVectorsAndLabels(oneHotVectors)
del oneHotVectors
clf = OneVsRestClassifier(SVC(C=1, kernel = 'linear',gamma=0.1, verbose= False, probability=False))
clf.fit(trainVectors, trainLabels)
print "\nDone fitting classifier on training data...\n"
print "\nDone fitting classifier on training data...\n"
print "="*50, "\n"
print "Results with 10-fold cross validation:\n"
print "="*50, "\n"
predicted = cross_validation.cross_val_predict(clf, trainVectors, trainLabels, cv=10)
print "*"*20
print "\t accuracy_score\t", metrics.accuracy_score(trainLabels, predicted)
print "*"*20
print "precision_score\t", metrics.precision_score(trainLabels, predicted)
print "recall_score\t", metrics.recall_score(trainLabels, predicted)
print "\nclassification_report:\n\n", metrics.classification_report(trainLabels, predicted)
print "\nconfusion_matrix:\n\n", metrics.confusion_matrix(trainLabels, predicted)
示例11: transform
def transform(self, X):
# Purpose of skip is to skip the estimator
if self.skip:
return X
# Is the data being transformed the same as the training data
is_train_data = False
if isinstance(X, pd.DataFrame) and self.hashed_value == hash(X.values.data.tobytes()):
is_train_data = True
if isinstance(X, np.ndarray) and self.hashed_value == hash(X.data.tobytes()):
is_train_data = True
# If the dataset is the training data, use CV predictions
if is_train_data:
feature = cross_val_predict(clone(self.model), X, self.y)#, cv=self.train_cv)
# Otherwise, use the model to predict
else:
feature = self.model.predict(X)
# Add feature to dataset
if isinstance(X, pd.DataFrame):
X[self.feature_name] = feature
if isinstance(X, np.ndarray):
X = np.c_[X, feature]
return X
示例12: classify_cv
def classify_cv(data, cats, k):
clf = svm.SVC(gamma=0.001, C=100.)
vect = TfidfVectorizer(analyzer = 'word', stop_words = stopwords)
tfidf_matrix = vect.fit_transform(data)
predicted = cross_validation.cross_val_predict(clf, tfidf_matrix, cats, cv=k)
conf_matrix = metrics.confusion_matrix(cats, predicted)
print (metrics.classification_report(cats, predicted))
示例13: checkSkflowAccuracy
def checkSkflowAccuracy(dataset,target):
# baseline: 0.6923 with max_feat=0.5
classifier = RandomForestClassifier(max_depth=8, n_estimators=500, n_jobs=8, random_state=1, max_features=0.9)
predicted = cross_validation.cross_val_predict(classifier,dataset,target,cv=5)
score = metrics.accuracy_score(target,predicted)
print("Accuracy: " + str(score))
print(metrics.confusion_matrix(target,predicted,labels=[0,1,2,3,4,5]))
示例14: run
def run(params):
train = loadDataFrame(params,'train')
if params['test']:
test = loadDataFrame(params,'test')
train = runPreprocess(train,params)
clf = getSpecifiedClf(params)
try:
dataset,target = splitDatasetTarget(train,params['target'])
except:
raise Exception('Target not specified')
try:
cross_val = params['cross_validate']
except:
cross_val = False
clfName = getNameFromModel(clf)
if cross_val and clfName != 'XGBClassifier':
print('Beginning cross validation')
predicted = cross_validation.cross_val_predict(clf,dataset,target,cv=5,n_jobs=-1)
accuracyChecker(target,predicted)
return
if clfName == 'XGBClassifier':
print('Xgboost CV selected. Beginning to find optimal rounds')
clf = xgboostCV(clf,dataset,target)
print('Xgboost Accuracy on 80-20 split (for speed)')
trainX,testX,trainY,testY = splitTrainTest(dataset,target)
clf.fit(trainX,trainY)
predicted = clf.predict(testX)
accuracyChecker(testY,predicted)
示例15: get_testing_metrics
def get_testing_metrics(model, X, y, metrics, as_indexes, n_folds, X_test=None):
y_pred = cross_val_predict(
model,
X,
y,
cv=StratifiedKFold(
y,
n_folds=n_folds,
shuffle=True,
random_state=RANDOM_STATE
)
)
print "y_pred", y_pred
model.fit(X, y)
result = get_y_true_y_pred_based_metrics(y, y_pred, metrics)
if FEATURES in metrics:
result[FEATURES] = model.get_support(indices=True)
if OBJECTS in metrics:
if as_indexes:
result[OBJECTS] = [get_data_keeper().get_object_name_by_index(index) for (index,) in X]
else:
result[OBJECTS] = list(X.index)
if TEST_PREDICTIONS in metrics:
result[TEST_PREDICTIONS] = X_test, model.predict(X_test)
return result