当前位置: 首页>>代码示例>>Python>>正文


Python tester.test_classifier函数代码示例

本文整理汇总了Python中tester.test_classifier函数的典型用法代码示例。如果您正苦于以下问题:Python test_classifier函数的具体用法?Python test_classifier怎么用?Python test_classifier使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了test_classifier函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: tune_classifier

def tune_classifier(classifier, clf_params, max_features):
    ### features_list is a list of strings, each of which is a feature name.
    ### The first feature must be "poi".
    features_list = get_feature_list()

    ### Create new feature(s)
    ### Store to my_dataset for easy export below.
    my_dataset = get_data()

    ### Extract features and labels from dataset for local testing
    features_list = features_list[0:max_features+1]
    data, labels, features = get_features_and_labels(my_dataset, features_list)

    ### Tune your classifier to achieve better than .3 precision and recall
    ### using our testing script. Check the tester.py script in the final project
    ### folder for details on the evaluation method, especially the test_classifier
    ### function. Because of the small size of the dataset, the script uses
    ### stratified shuffle split cross validation. For more info:
    ### http://scikit-learn.org/stable/modules/generated/sklearn.cross_validation.StratifiedShuffleSplit.html

    from sklearn.cross_validation import train_test_split
    features_train, features_test, labels_train, labels_test = \
        train_test_split(features, labels, test_size=0.3, random_state=42)

    # Testing
    clf = GridSearchCV(classifier, param_grid=clf_params, scoring=make_scorer(f1_score))
    clf.fit(features_train, labels_train)
    clf_final = clf.best_estimator_
    print "The best estimator = ", clf_final
    test_classifier(clf_final, my_dataset, features_list, 1000)
开发者ID:tommysiu,项目名称:udacity-data-analyst,代码行数:30,代码来源:tuning.py

示例2: RandomForest

def RandomForest(feature_list,dataset):
    from sklearn.ensemble import RandomForestClassifier
    clf = RandomForestClassifier()
    test_classifier(clf,dataset,feature_list)
    imp= clf.feature_importances_
    print_importance (feature_list,imp)
    return clf
开发者ID:MengoDB,项目名称:Identify-Fraud-from-Enron-Emails,代码行数:7,代码来源:poi_id.py

示例3: GaussianNB

def GaussianNB(feature_list, dataset):
    from sklearn.naive_bayes import GaussianNB

    clf = GaussianNB()
    test_classifier(clf, dataset, feature_list)
    #score = clf.
    return clf
开发者ID:MengoDB,项目名称:ud120-projects,代码行数:7,代码来源:poi_bkp.py

示例4: decisionTree

def decisionTree(feature_list, dataset):
    from sklearn import tree

    clf = tree.DecisionTreeClassifier()
    test_classifier(clf, dataset, feature_list)
    print clf.feature_importances_
    return clf
开发者ID:MengoDB,项目名称:ud120-projects,代码行数:7,代码来源:poi_bkp.py

示例5: iterPipe

def iterPipe(num1, num2):
    for i in range(num1, num2 + 1):
        # estimators = [('scaling', StandardScaler()),('reduce_dim', PCA()), ('dtc', DTC(min_samples_split=i*2))]
        # estimators = [('reduce_dim', PCA(n_components=2)), ('dtc', DTC(min_samples_split=i))]
        # clfIter = Pipeline(estimators)
        # clfIter.set_params(reduce_dim__n_components=3)
        clfIter = DTC(min_samples_split=i)
        test_classifier(clfIter, my_dataset, features_list)
开发者ID:Faylfire,项目名称:identifying_enron_fraud_project_5_fang_lu,代码行数:8,代码来源:poi_id.py

示例6: KNN

def KNN(feature_list,dataset):
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import StandardScaler
	knn = KNeighborsClassifier()
	# feature scale
	estimators = [('scale', StandardScaler()), ('knn', knn)]
	clf = Pipeline(estimators)
	test_classifier(clf, my_dataset, features_list)
开发者ID:yielder,项目名称:identifying-fraud-from-enron-email,代码行数:9,代码来源:poi_id.py

示例7: setup_and_test

def setup_and_test(my_dataset, features_list, classifier):
    # Dump classifier and features list, so we can test them
    dump_classifier_and_data(classifier, my_dataset, features_list)

    # load up student's classifier, dataset, and feature_list
    clf, dataset, feature_list = load_classifier_and_data()
    # Run testing script
    test_classifier(clf, dataset, feature_list)

    return
开发者ID:joashxu,项目名称:enron,代码行数:10,代码来源:utils.py

示例8: tuneKmeans

def tuneKmeans(feature_list,dataset):
    from sklearn.cluster import KMeans
    from sklearn.grid_search import GridSearchCV
    km_clf = KMeans(n_clusters=2, tol=0.001)

    parameters = {'n_clusters': (2,10)}
    clf = GridSearchCV(km_clf, parameters, scoring='recall')
    test_classifier(clf, dataset, feature_list)
    print '###best_params'
    print clf.best_params_
    return clf.best_estimator_
开发者ID:MengoDB,项目名称:ud120-projects,代码行数:11,代码来源:poi_bkp.py

示例9: tuneDT

def tuneDT(feature_list,dataset):
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.grid_search import GridSearchCV
	from sklearn import tree
	tree_clf = tree.DecisionTreeClassifier()
	parameters = {'criterion':('gini', 'entropy'),
		'splitter':('best','random')}
	clf = GridSearchCV(tree_clf, parameters,scoring = 'recall')
	test_classifier(clf, my_dataset, features_list)
	print '###best_params'
	print clf.best_params_
开发者ID:yielder,项目名称:identifying-fraud-from-enron-email,代码行数:11,代码来源:poi_id.py

示例10: detect_poi

def detect_poi():
### Load the dictionary containing the dataset
    data_dict = pickle.load(open("final_project_dataset.pkl", "r") )
### Task 1: Remove outliers
    data_dict.pop('TOTAL',0)    
    
### Task 2: Select what features
### 'stk_pay_ratio','to_poi_ratio', 'from_poi_ratio','bonus_salary_ratio'
### features_list is a list of strings, each of which is a feature name.
### The first feature must be "poi".
    my_dataset = data_dict
    stk_pay_ratio(my_dataset)
    from_poi_ratio(my_dataset)
    to_poi_ratio(my_dataset)
    bonus_salary_ratio(my_dataset)
     
### Task 3: Feature Selection
### Generate a set of 15 feature lists from these 4 features
### This way, all possible combinations of these features are tested

    all_features_list = fList_set()

### Because of the small size of the dataset, the script uses stratified
### shuffle split cross validation in tester.py
    metrics = []    
    clf = GaussianNB()    
### ptest uses Stratified shuffle split cross validation and calculates the precision
### Find the precision for every list
    for i in range(0,15):
        metrics.append(ptest(clf,my_dataset,all_features_list[i]))
### Go for the feature list that produces the best precision.  
### For this dataset only, it is harder to get a high precision.
    best = np.array(metrics).argmax()  
    
### Run test_classifier to print evaluation metrics to console
    test_classifier(clf, my_dataset,all_features_list[best])

### Now use the same feature list to run the decison tree classifier
    features_list = all_features_list[best]
### Task 4: Try a varity of classifiers
    samples_split_values = [2,4]
    samples_leaf_values = [1,2]

    for split in samples_split_values:
        for leaf in samples_leaf_values:
            clf = tree.DecisionTreeClassifier(min_samples_split=split,\
            min_samples_leaf=leaf)
            test_classifier(clf, my_dataset, features_list)
            print_feature_importances(features_list, clf)
###Choose best classfier and feature set    
    clf = GaussianNB()   

### Dump classifier, dataset, and features_list
    dump_classifier_and_data(clf, my_dataset, features_list)
开发者ID:RaphaelTam,项目名称:Enron_Bad_Guys,代码行数:54,代码来源:poi.id.py

示例11: explore_scores

def explore_scores():
    for n in features:
        for c in n_neighbor:
            for d in weights:
                for e in algorithm:
                    for f in leaf_size:
                        for g in p:
                            for h in metric:
                                feature = 0
                                feature = features_select(n)
                                pipeline = Pipeline([('normalization', scaler), 
                                             ('classifier', KNeighborsClassifier(n_neighbors=c, weights=d, algorithm=e, 
                                                                                 leaf_size=f, p=g, metric=h))])
                                test_classifier(pipeline, enron_data, feature)
开发者ID:BlaneG,项目名称:Udacity_Intro_machine_learning,代码行数:14,代码来源:poi_id.py

示例12: tuneKNN

def tuneKNN(feature_list,dataset):
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import StandardScaler
	from sklearn.grid_search import GridSearchCV
	knn = KNeighborsClassifier()
	# feature scale
	estimators = [('scale', StandardScaler()), ('knn', knn)]
	pipeline = Pipeline(estimators)
	parameters = {'knn__n_neighbors':[1,8],
		'knn__algorithm':('ball_tree','kd_tree','brute','auto')}
	clf = GridSearchCV(pipeline, parameters,scoring = 'recall')
	test_classifier(clf, my_dataset, features_list)
	print '###best_params'
	print clf.best_params_
开发者ID:yielder,项目名称:identifying-fraud-from-enron-email,代码行数:15,代码来源:poi_id.py

示例13: getRF

def getRF():

    print "==============="
    print "RandomForests"
    print "==============="

    for score in scores:

        print score
        print

        #parameters = {'n_estimators':range(10, 150, 10), 'criterion':['gini', 'entropy'], 'min_samples_split':range(2, 8, 2)}
        parameters = {'rf__n_estimators':range(10, 150, 10), 'rf__criterion':['gini', 'entropy'], 'rf__min_samples_split':range(2, 8, 2), 
            'selector__k':range(3, 22, 1)}	

        gs = grid_search.GridSearchCV(rf_pipe, parameters, scoring=score, cv=cv)
            
        gs.fit(features, labels)

         #This is the model you pass to tester.py
        clf = gs.best_estimator_

        print " "
        print "Optimal Model - by Grid Search"
        print clf
        print " "

        best_parameters = gs.best_estimator_.get_params()

        print " "
        print "Best Parameters- by Grid Search"
        print best_parameters
        print " "

        labels_pred = gs.predict(features)

        # Print Results  (will print the Grid Search score)
        print "Grid Search Classification report:" 
        print " "
        print classification_report(labels, labels_pred)
        print ' ' 

        # Print Results  (will print the tester.py score)
        print "tester.py Classification report:" 
        print " "
        test_classifier(clf, my_dataset, features_list)
        print " "
        print
开发者ID:abdelrahmansaud,项目名称:Identifying-Fraud-from-Enron-Email,代码行数:48,代码来源:poi_id.py

示例14: getKNN

def getKNN():

    print "==============="
    print "KNeighborsClassifier"
    print "==============="

    for score in scores:

        print score
        print

        #parameters = {'n_neighbors':range(2, 10, 2), 'weights':['distance', 'uniform'], 'metric':['minkowski', 'euclidean']}
        parameters = {'knn__n_neighbors': range(2, 10, 2), 'knn__weights':['distance', 'uniform'], 'knn__metric':['minkowski', 'euclidean'], 
            'selector__k':range(3, 20, 1)}

        gs = grid_search.GridSearchCV(knn_pipe, parameters, scoring=score, cv=cv)

        gs.fit(features, labels)

         #This is the model you pass to tester.py
        clf = gs.best_estimator_

        print " "
        print "Optimal Model - by Grid Search"
        print clf
        print " "

        best_parameters = gs.best_estimator_.get_params()

        print " "
        print "Best Parameters- by Grid Search"
        print best_parameters
        print " "

        labels_pred = gs.predict(features)

        # Print Results  (will print the Grid Search score)
        print "Grid Search Classification report:" 
        print " "
        print classification_report(labels, labels_pred)
        print ' ' 

        # Print Results  (will print the tester.py score)
        print "tester.py Classification report:" 
        print " "
        test_classifier(clf, my_dataset, features_list)
        print " "
        print
开发者ID:abdelrahmansaud,项目名称:Identifying-Fraud-from-Enron-Email,代码行数:48,代码来源:poi_id.py

示例15: getAda

def getAda():
		
	print "==============="
	print "AdaBoost"
	print "==============="

	for score in scores:

		print score
		print

		#parameters = {'n_estimators':range(50, 100, 1), 'learning_rate':[x * 0.01 for x in range(100, 160, 1)]}
		parameters = {'ada__n_estimators': range(1, 100, 20), 'ada__learning_rate':[x * 0.01 for x in range(100, 160, 10)],
			'selector__k':range(3, 22, 1)}

		gs = grid_search.GridSearchCV(ada_pipe, parameters, scoring=score, cv=cv)

		gs.fit(features, labels)

		 #This is the model you pass to tester.py
		clf = gs.best_estimator_

		print " "
		print "Optimal Model - by Grid Search"
		print clf
		print " "

		best_parameters = gs.best_estimator_.get_params()

		print " "
		print "Best Parameters- by Grid Search"
		print best_parameters
		print " "

		labels_pred = gs.predict(features)

		# Print Results  (will print the Grid Search score)
		print "Grid Search Classification report:" 
		print " "
		print classification_report(labels, labels_pred)
		print ' ' 

		# Print Results  (will print the tester.py score)
		print "tester.py Classification report:" 
		print " "
		test_classifier(clf, my_dataset, features_list)
		print " "
		print
开发者ID:abdelrahmansaud,项目名称:Identifying-Fraud-from-Enron-Email,代码行数:48,代码来源:poi_id.py


注:本文中的tester.test_classifier函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。