当前位置: 首页>>代码示例>>Python>>正文


Python cross_validation.cross_val_score函数代码示例

本文整理汇总了Python中sklearn.cross_validation.cross_val_score函数的典型用法代码示例。如果您正苦于以下问题:Python cross_val_score函数的具体用法?Python cross_val_score怎么用?Python cross_val_score使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了cross_val_score函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: getClfScore

def getClfScore(classifier, features, labels, cv):
    '''Evaluating performance of estimator

    param:
        classifier : classifiers list
        features   : data to fit
        labels     : samples data
        cv         : cross validation iterator
    return:
        test_score : dict of classification score
    '''
    
    test_score = {}

    for idx, clfname in enumerate(sorted(classifier.keys())):
        clf_score = {}
        clf = classifier[clfname]
        precision = cross_val_score(clf, features, labels, 'precision', cv)
        recall    = cross_val_score(clf, features, labels, 'recall', cv)
        
        clf_score['precision'] = np.mean(precision)
        clf_score['recall']    = np.mean(recall)
        
        test_score[clfname] = clf_score
    return test_score
开发者ID:knopthakorn,项目名称:Data-Analyst,代码行数:25,代码来源:poi_id.py

示例2: test_cross_val_score_fit_params

def test_cross_val_score_fit_params():
    clf = MockClassifier()
    n_samples = X.shape[0]
    n_classes = len(np.unique(y))
    fit_params = {'sample_weight': np.ones(n_samples),
                  'class_prior': np.ones(n_classes) / n_classes}
    cval.cross_val_score(clf, X, y, fit_params=fit_params)
开发者ID:GGXH,项目名称:scikit-learn,代码行数:7,代码来源:test_cross_validation.py

示例3: cv

    def cv(self, parameters, scoring="roc_auc"):
        """ Evaluate score by cross validation. """

        X = self.data.values.astype(np.float)
        y = self.label.values

        print cross_val_score(self.estimator, X, y, scoring=scoring, cv=3)
开发者ID:JFanZhao,项目名称:practice,代码行数:7,代码来源:adult.py

示例4: importData

def importData(datadirectory):
	#categories = ['n','u', 'y']
	categories = ['n', 'y']

	data = load_files(datadirectory,categories=categories, shuffle=True, random_state=42, encoding='latin-1') 
	X_train, X_test, y_train, y_test = cross_validation.train_test_split(data.data, data.target, test_size = 0.4, random_state=0)
	print X_train 
	# count_vect = CountVectorizer()
	# X_train_vec = count_vect.fit_transform(X_train)
	# X_test_vec = count_vect.fit_transform(X_test)
	# clf = svm.SVC(kernel='linear', C=1).fit(X_train_vec, y_train)
	# clf.score(X_test_vec, y_test) 

	text_clf = Pipeline([('vect', TfidfVectorizer()), ('clf', MultinomialNB())])
	#print text_clf.named_steps['clf']
	print str(sum(cross_val_score(text_clf, data.data,data.target ))/3.0) + ' Tfidf NB'
	#array([ 0.62376238,  0.57      ,  0.6122449 ])
	text_clf = Pipeline([('vect', CountVectorizer()),('clf', MultinomialNB()),]) 
	print str(sum(cross_val_score(text_clf, data.data,data.target ))/3.0) + ' CountVec NB'                                         #array([ 0.56435644,  0.5       ,  0.57142857])
	clf = Pipeline([('vect', CountVectorizer()), ('svm', LinearSVC())])                        
	print str(sum(cross_val_score(clf, data.data,data.target ))/3.0) + ' CountVec SVM'
	#array([ 0.55445545,  0.48      ,  0.54081633])
	clf = Pipeline([('vect', TfidfVectorizer()), ('svm', LinearSVC())])                    
	print str(sum(cross_val_score(clf, data.data,data.target ))/3.0) + ' Tfidf SVM'
	#array([ 0.62376238,  0.57      ,  0.6122449 ])
	clf_sgdc = Pipeline([('vect', CountVectorizer()),('clf', linear_model.SGDClassifier()),])
	print str(sum(cross_val_score(clf_sgdc, data.data,data.target ))/3.0) + ' SGDC' 
开发者ID:krsreenatha,项目名称:IndyRef,代码行数:27,代码来源:model.py

示例5: crossValidation

def crossValidation():
    data2010, labels2010 = read_tac('2010')

    #classifiers
    gnb = naive_bayes.GaussianNB()
    Svm = svm.SVC(kernel = "linear")
    logReg = linear_model.LogisticRegression()

    GNBscores = cross_validation.cross_val_score(gnb, data2010, labels2010, cv=2)
    SVMscores = cross_validation.cross_val_score(Svm, data2010, labels2010, cv=2)
    logRegscores = cross_validation.cross_val_score(logReg, data2010, labels2010, cv=2)

    print "Results:"
    print "Gaussian Naive Bayes: " 
    print str(GNBscores.mean())
    print "Support Vector Machine: " 
    print str(SVMscores.mean())
    print "Logistic Regression: " 
    print str(logRegscores.mean())

    fh.write("Results:" + "\n")
    fh.write("Gaussian Naive Bayes: "  + "\n")
    fh.write(str(GNBscores.mean()) + "\n")
    fh.write("Support Vector Machine: "  + "\n")
    fh.write(str(SVMscores.mean()) + "\n")
    fh.write("Logistic Regression: "  + "\n")
    fh.write(str(logRegscores.mean()) + "\n")
    fh.write("-------------------------------------------------\n")
    fh.write("\n\n")
开发者ID:daveguy,项目名称:Comp599,代码行数:29,代码来源:a1.py

示例6: dofitSVMstd

def dofitSVMstd(X_train, Y_train, X_test):
    shape = X_train.shape
    b = []
    for j in range(shape[0]):
        a1 = [np.std(X_train[j, :, i]) for i in range(shape[2])]
        a2 = [getEntropy(list(X_train[j, :, i].astype(int))) for i in range(shape[2])]
        a1.sort(reverse=True)
        a2.sort()
        b.append(a1[0:16] + a2[0:16])

    x1 = np.array(b)
    clf = RandomForestClassifier()
    dummy = clf.fit(x1, Y_train)
    scores = cross_validation.cross_val_score(clf, x1, Y_train)
    p1 = clf.predict(x1)
    shape = X_test.shape
    b = []
    for j in range(shape[0]):
        a1 = [np.std(X_test[j, :, i]) for i in range(shape[2])]
        a2 = [getEntropy(list(X_test[j, :, i].astype(int))) for i in range(shape[2])]
        a1.sort(reverse=True)
        a2.sort()
        b.append(a1[0:16] + a2[0:16])

    x2 = np.array(b)
    y2 = clf.predict(x2)
    xx = np.concatenate((x1, x2))
    yy = np.concatenate((Y_train, y2))
    dummy = clf.fit(xx, yy)
    p2 = clf.predict(x2)
    scores = cross_validation.cross_val_score(clf, x1, Y_train)
    # sum(clf.predict(x2))
    return [scores, np.concatenate((p1, p2))]
开发者ID:rbroberg,项目名称:kaggle.com,代码行数:33,代码来源:seizure_detection_simple_40a.py

示例7: test_cross_val_score_filter_feature_selection_threshold

def test_cross_val_score_filter_feature_selection_threshold():

    threshold = 1.0
    scikit_data,scikit_target = dfm.get_expression_scikit_data_target(expression_file, ic50_file,normalized=True,trimmed=True,threshold=None)
    model = classify.construct_svc_model(kernel='linear')
    non_thresholded_test_1 = cv.cross_val_score_filter_feature_selection(model,cv.trim_X_threshold,threshold,scikit_data,scikit_target,cv=5)

    m = classify.construct_svc_model(kernel='linear')
    s_data,s_target = dfm.get_expression_scikit_data_target(expression_file, ic50_file,normalized=True,trimmed=True,threshold=threshold)
    non_thresholded_test_2 = cross_val_score(m,s_data,s_target,cv=5)

    threshold = .05
    scikit_data,scikit_target = dfm.get_expression_scikit_data_target(expression_file, ic50_file,normalized=True,trimmed=True,threshold=None)
    model = classify.construct_svc_model(kernel='linear')
    thresholded_test_1 = cv.cross_val_score_filter_feature_selection(model,cv.trim_X_threshold,threshold,scikit_data,scikit_target,cv=5)

    m = classify.construct_svc_model(kernel='linear')
    s_data,s_target = dfm.get_expression_scikit_data_target(expression_file, ic50_file,normalized=True,trimmed=True,threshold=threshold)
    thresholded_test_2 = cross_val_score(m,s_data,s_target,cv=5)


    #The non-thresholded tests should be the same because if we are not thresholding, it doesn't matter where we perform thresholding
    assert(math.fabs(non_thresholded_test_1.mean() - non_thresholded_test_2.mean()) < .001)

    #The first non_thresholded test should have lower accuracy because we are doing thresholding within the cross-validation,
    #which will reduce cross-validation overfitting and as a consequence reported cross-validation accuracy.
    assert(thresholded_test_1.mean() - thresholded_test_2.mean() < 0)
开发者ID:joewledger,项目名称:Cell-Line-Classification,代码行数:27,代码来源:Test_Cross_Val.py

示例8: validate_model

def validate_model(model, features, labels):
    accuracy = cross_val_score(model, features, labels, scoring='accuracy', cv=4).mean()
    precision = cross_val_score(model, features, labels, scoring='precision', cv=4).mean()
    recall = cross_val_score(model, features, labels, scoring='recall', cv=4).mean()
    f1 = cross_val_score(model, features, labels, scoring='f1', cv=4).mean()
    print "\n(METRICS) Accuracy: {:.3f}   Precision: {:.3f}   Recall: {:.3f}   F1-Score: {:.3f}".\
        format(accuracy,precision, recall, f1)
开发者ID:nhtruong,项目名称:ud120-projects,代码行数:7,代码来源:poi_id.py

示例9: experiment_zero

def experiment_zero(data,company):
	print '___Experiment One___'
	# Experiment Parameters
	finance_datatype = 0    # finance_datatype: Integer 2 = Stock price change, 1 = Percentage stock price change, 0 = Only direction
	finance_n = 2           # finance_n: Integer >=0 Number of days of finance data to include
	sentiment_datatype = 1	# sentiment_datatype: Boolean 1 = all sentiment featues, 0 = Total
	sentiment_n = 1 		# sentiment_n: Integer >=0 Number of days of sentiment data to include
	day = 0                 # day: Boolean 1 = Include day of the week, 0 = do not
	target = 0				# target: Boolean 1 = Amount, 0 = Direction
	volume = 0 				# volume: boolean 1 = Yes, 0 = No
	if (finance_n + sentiment_n + day + volume) == 0:
		print 'Insufficient parameters set'
		return 

	# Data Processing
	feature_vector_meaning(company, finance_datatype, finance_n, sentiment_datatype, sentiment_n, day, target, volume)
	matrix = create_feature_matrix(company, data, finance_datatype, finance_n, sentiment_datatype, sentiment_n, day, target, volume)
	end = len(matrix[0])
	train_x = matrix[:,0:end-1]
	train_y = matrix[:,end-1]

	# Classifier training
	scaler = preprocessing.StandardScaler().fit(train_x)
	train_x = scaler.transform(train_x)

	clf = direction_classifier(train_x,train_y)
	cv = cross_validation.ShuffleSplit(len(train_x), n_iter=5, test_size=0.2, random_state=0)
	print ' _ _ _Evaluation_ _ _'
	if target == 0:
		scores = cross_validation.cross_val_score(clf, train_x, train_y, cv=cv, scoring='accuracy')
		print("  Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
	elif target == 1:
		scores = cross_validation.cross_val_score(clf, train_x, train_y, cv=cv, scoring='mean_squared_error')
		print("  MSE: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))		
	print '====================='
开发者ID:RichEverett,项目名称:DowJonesTwitter,代码行数:35,代码来源:model.py

示例10: run_model

def run_model(data):
    """Do some label bucketing, print model output."""
    features = data.ix[:, :-1]

    # more categories <--> less accuracy
    # labels = data.ix[:, -1].map(lambda k: 1 if k > 10 else 0)
    labels = data.ix[:, -1].map(lambda k: int(k / 5))     # bucketing trick
    print 'num classes = {}\n'.format(len(set(labels)))

    # weak (base) classifier
    print 'fitting weak classifier...'
    weak_clf = DecisionTreeClassifier(max_depth=MAX_DEPTH)

    weak_cv_results = cross_val_score(weak_clf, features, labels,
        cv=N_FOLDS)
    print 'weak_cv_results = {}'.format(weak_cv_results)
    print 'avg accuracy = {}\n'.format(weak_cv_results.mean())
    
    # strong (ensemble) classifier
    print 'fitting strong classifier...'
    strong_clf = RandomForestClassifier(
        max_depth=MAX_DEPTH,
        n_estimators=N_TREES,
        n_jobs=N_JOBS)

    strong_cv_results = cross_val_score(strong_clf, features, labels,
        cv=N_FOLDS)
    print 'strong_cv_results = {}'.format(strong_cv_results)
    print 'avg accuracy = {}'.format(strong_cv_results.mean())
开发者ID:abbas91,项目名称:gads,代码行数:29,代码来源:abalone_forest.py

示例11: coeff_of_deterimination

def coeff_of_deterimination(classifier, X, y, K=10):
    # Perform a cross-validation estimate of the coefficient of determination using
    # the cross_validation module using all CPUs available on the machine
    R21 = cross_val_score(classifier, X, y=y, n_jobs=1).mean()
    R2 = cross_val_score(classifier, X, y=y, cv=KFold(y.size, K), n_jobs=1).mean()
    print "The %d-Folds est coeff. of determ. R2 = %s" % (K, R2)
    print "basic cross val ", R21
开发者ID:abnarain,项目名称:malware_detection,代码行数:7,代码来源:ml.py

示例12: run_conventional_linkage

def run_conventional_linkage(x, y, n_comps, linker_model, verbose=0, k_folds=3):
  print "---->Cross validating"
  cvs = cross_val_score(linker_model, x, y, cv=k_folds, scoring='r2', verbose=verbose)
  mse = cross_val_score(linker_model, x, y, cv=k_folds, scoring='mean_squared_error', verbose=verbose)
  print '---->R2: ', np.mean(cvs)
  print '---->MSE: ', np.mean(mse)
  return np.mean(cvs), np.std(cvs), np.mean(mse), np.std(mse)
开发者ID:Materials-Informatics-Class-Fall2015,项目名称:MIC-Ternary-Eutectic-Alloy,代码行数:7,代码来源:smart_pipeline.py

示例13: _run_classifier

def _run_classifier(X, Y, parent, child, max_depth):
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.333, random_state=0)
    clf = tree.DecisionTreeClassifier(min_samples_split=parent, min_samples_leaf=child, max_depth=max_depth)
    clf = clf.fit(X_train, y_train)

    print 'model score on train data data:'
    print clf.score(X_train, y_train)
    print 'ten fold cross-validation results on train data:'
    scores = cross_val_score(clf, X_train, y_train, cv=10)
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

    print 'model score on test data'
    print clf.score(X_test, y_test)
    print 'ten fold cross-validation results on test data:'
    scores = cross_val_score(clf, X_test, y_test, cv=10)
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

    print 'Gini Importance'
    print clf.feature_importances_

    'Classification Report'
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))

    'Confusion Matrix'
    print(confusion_matrix(y_true, y_pred))
    cm = confusion_matrix(y_true, y_pred)
    print _calc_error_rate_conf_int(cm)
    return _calc_error_rate_conf_int(cm) + [len(y_test)]
开发者ID:jugovich,项目名称:IS576,代码行数:29,代码来源:hw4.py

示例14: fit_from_prep

 def fit_from_prep(self, infile):
     H, y, w = self._da.load_from_file(infile)
     self._vq = VQ(w, hist=w.shape[0])
     self._cl.fit(H, y)
     if self._verbose:
         print cross_validation.cross_val_score(
             self._cl, H, y, cv=3).mean()
开发者ID:fpeder,项目名称:mscr,代码行数:7,代码来源:bovw.py

示例15: analytics

def analytics():
    trainer_data = get_thing_from_file("training_dataset.txt")
    tester_data = get_thing_from_file("test_dataset.txt")

    bayes_clf = get_thing_from_file("bayes_model.txt")
    svm_clf = get_thing_from_file("svm_model.txt")

    # we load the fitted models from file so we don't need these lines
    # bayes_clf.fit(trainer_data.data, trainer_data.target)
    # svm_clf.fit(trainer_data.data, trainer_data.target)

    test = tester_data.data

    predicted_bayes = bayes_clf.predict(test)
    predicted_svm = svm_clf.predict(test)

    print "** ACCURACIES **"
    print numpy.mean(predicted_bayes == tester_data.target)
    print numpy.mean(predicted_svm == tester_data.target)

    print "** K-FOLD VALIDATION ACCURACY"

    bayes_scores = cross_validation.cross_val_score(bayes_clf,
                                    tester_data.data, tester_data.target, cv=10)

    svm_scores = cross_validation.cross_val_score(svm_clf, tester_data.data,
                                              tester_data.target, cv=10)

    print max(bayes_scores)
    print max(svm_scores)
    print "**"
开发者ID:colinricardo28,项目名称:Peepl,代码行数:31,代码来源:classifiers.py


注:本文中的sklearn.cross_validation.cross_val_score函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。