当前位置: 首页>>代码示例>>Python>>正文


Python DecisionTreeClassifier.fit方法代码示例

本文整理汇总了Python中sklearn.tree.DecisionTreeClassifier.fit方法的典型用法代码示例。如果您正苦于以下问题:Python DecisionTreeClassifier.fit方法的具体用法?Python DecisionTreeClassifier.fit怎么用?Python DecisionTreeClassifier.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.tree.DecisionTreeClassifier的用法示例。


在下文中一共展示了DecisionTreeClassifier.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
class Ensemble:

	def __init__(self, data):
		self.rf = RandomForestClassifier(n_estimators=80, n_jobs=-1, min_samples_split=45, criterion='entropy')
		self.lda = LDA()
		self.dec = DecisionTreeClassifier(criterion='entropy')
		self.ada = AdaBoostClassifier(n_estimators=500, learning_rate=0.25)

		self.make_prediction(data)


	def make_prediction(self, data):
		'''
		Make an ensemble prediction
		'''
		self.rf.fit(data.features_train, data.labels_train)
		self.lda.fit(data.features_train, data.labels_train)
		self.dec.fit(data.features_train, data.labels_train)
		self.ada.fit(data.features_train, data.labels_train)

		pre_pred = []
		self.pred = []

		ada_pred = self.ada.predict(data.features_test)
		rf_pred = self.rf.predict(data.features_test)
		lda_pred = self.lda.predict(data.features_test)
		dec_pred = self.dec.predict(data.features_test)

		for i in range(len(rf_pred)):
			pre_pred.append([ rf_pred[i], lda_pred[i], dec_pred[i], ada_pred[i] ])

		for entry in pre_pred:
			pred_list = sorted(entry, key=entry.count, reverse=True)
			self.pred.append(pred_list[0])
开发者ID:BHouwens,项目名称:KaggleProjects,代码行数:36,代码来源:ensemble.py

示例2: sampling_overfitting

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def sampling_overfitting(rate=3):
    data = []

    plt.figure(1)
    plt.clf()
    majN = 100
    minoN = 20
    jt = 1
    x = np.random.rand(majN) * 2 - 1 + np.random.randn(majN) * jt
    y = np.random.rand(majN) * 2 - 1 + np.random.randn(majN) * jt
    plt.plot(x, y, 'o', ms=10)
    for i in xrange(majN):
        data.append([x[i], y[i], 0])

    x = np.random.rand(minoN) + 0.1 + np.random.randn(minoN) * jt
    y = np.random.rand(minoN) + 0.1 + np.random.randn(minoN) * jt
    plt.plot(x, y, '*', ms=10)
    for i in xrange(minoN):
        for j in xrange(rate):
            data.append([x[i], y[i], 1])
    xlim, ylim = plt.xlim(), plt.ylim()

    mdl = DecisionTreeClassifier(criterion='entropy')
    data = np.array(data)
    mdl.fit(data[:, :-1], data[:, -1])

    x = np.linspace(xlim[0], xlim[1], 300)
    y = np.linspace(ylim[0], ylim[1], 300)
    X, Y = np.meshgrid(x, y)
    grid_points = np.c_[X.ravel(), Y.ravel()]
    Z = mdl.predict(grid_points)
    Z = Z.reshape((len(x), -1))
    plt.contourf(x, y, Z, 1)
    plt.show()
开发者ID:tianfudhe,项目名称:ids,代码行数:36,代码来源:show_3problems.py

示例3: quize1

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def quize1(data):
# 1. Select count of neighbors.Загрузите выборку из файла titanic.csv с помощью пакета Pandas.
# 2.Оставьте в выборке четыре признака: класс пассажира (Pclass), цену билета (Fare), возраст пассажира (Age) и его пол (Sex).
# 3.Обратите внимание, что признак Sex имеет строковые значения.
# 4.Выделите целевую переменную — она записана в столбце Survived.
# 5.В данных есть пропущенные значения — например, для некоторых пассажиров неизвестен их возраст.
# 6.Такие записи при чтении их в pandas принимают значение nan.
# Найдите все объекты, у которых есть пропущенные признаки, и удалите их из выборки.
# Обучите решающее дерево с параметром random_state=241 и остальными параметрами по умолчанию.
# Вычислите важности признаков и найдите два признака с
# наибольшей важностью. Их названия будут ответами для данной задачи
# (в качестве ответа укажите названия признаков через запятую или пробел, порядок не важен).
    dataF = data[['Pclass', 'Fare', 'Age', 'Sex','Survived']]
    dataF = dataF.dropna()
    Y = dataF['Survived']
    dataF = dataF[['Pclass', 'Fare', 'Age', 'Sex']]
    clf = DecisionTreeClassifier(random_state=241)
    dataF.loc[dataF['Sex'] != 'male', 'Sex'] = 0
    dataF.loc[dataF['Sex'] == 'male', 'Sex'] = 1
    print (dataF)
    clf.fit(dataF, Y)
    importances = clf.feature_importances_
    print(importances)
    # d = zip(dataF.columns, clf.feature_importanc_)
    # print(d)
    return
开发者ID:BlinJin,项目名称:Machine-Learning,代码行数:28,代码来源:decision_trees.py

示例4: __init__

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
class Transformer:
    def __init__(self, use_PCA=True):
        self._clf = DecisionTreeClassifier(min_samples_leaf=10)
        self._idx = None
        self._scaler = StandardScaler()
        self._trans = PCA('mle')
        self._use_PCA = use_PCA

    def fit(self, X, y):
        X = np.array(X)
        self._clf.fit(X, y)

        self._idx = filter(lambda x: self._clf.feature_importances_[x] > 0, \
                range(len(self._clf.feature_importances_)))

        new_set = [X[i][self._idx] for i in xrange(len(X))]

#        new_set = self._scaler.fit_transform(new_set)

        if self._use_PCA:
            new_set = self._trans.fit_transform(new_set)
        return new_set

    def transform(self, features):
        features = features[self._idx]
#        features = self._scaler.transform(features.astype(float))
        if self._use_PCA:
            features = self._trans.transform(features)
        return features
开发者ID:ItsLastDay,项目名称:Opinion-mining-from-reviews,代码行数:31,代码来源:solution.py

示例5: decision_tree

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def decision_tree(train_bow,train_labels,test_bow,test_labels,bow_indexes):
    print("Training decision tree")
    dt_classifier=DecisionTreeClassifier()

    dt_classifier.fit(train_bow,train_labels)
    print("Testing decision tree")
    test(dt_classifier,"dt",test_bow,test_labels,bow_indexes)
开发者ID:wangk1,项目名称:research,代码行数:9,代码来源:classifiers_func.py

示例6: evaluateDecisionTree

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def evaluateDecisionTree(train_x,train_y,test_x,test_y):
    clf = DecisionTreeClassifier(criterion='entropy',min_samples_leaf=5,max_depth=20)
    clf.fit(train_x,train_y)
    p = clf.predict_proba(test_x)[:,1]
    auc = roc_auc_score(test_y,p)
    plotAUC(test_y,clf.predict_proba(test_x)[:,1],'DT')
    return auc
开发者ID:ds-ga-1001-final,项目名称:project,代码行数:9,代码来源:decision_tree.py

示例7: test_graphviz_errors

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def test_graphviz_errors():
    """Check for errors of export_graphviz"""
    clf = DecisionTreeClassifier(max_depth=3, min_samples_split=1)
    clf.fit(X, y)

    out = StringIO()
    assert_raises(IndexError, export_graphviz, clf, out, feature_names=[])
开发者ID:Hydroinformatics-UNESCO-IHE,项目名称:scikit-learn,代码行数:9,代码来源:test_export.py

示例8: buildTree

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def buildTree(options, treefile, dataFile = None):
    dt = loadTree(treefile)
    if dt is not None:
        return dt
    if dataFile is None:
        raise ValueError("No data file specified")

    dt = DecisionTreeClassifier(min_samples_split=20, random_state=99)
    files = []
    featureFrames = []
    targetFrames = []
    if os.path.isdir(dataFile):
        files = getFiles(dataFile, ".csv")
    else:
        files.append(dataFile)
    for _file in files:
        print("Loading data %s" % _file)
        (featureValues, targetValues, features, df) = loadData(_file, options)
        featureFrames.append(featureValues)
        targetFrames.append(targetValues)
    dt.fit(pd.concat(featureFrames), pd.concat(targetFrames))
    saveTree(treefile, dt)
    print("Building graph")
    visualize_tree(treefile, dt, features)
    return dt
开发者ID:gbrian,项目名称:naive-machine-learning,代码行数:27,代码来源:searchml.py

示例9: decision_trees

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def decision_trees(features, labels):
    classifier = DecisionTreeClassifier(random_state=0, criterion="entropy")
    classifier.fit(features, labels)
    scores = cross_validation.cross_val_score(
        classifier, features, labels, cv=10, score_func=metrics.precision_recall_fscore_support
    )
    print_table("Decision Trees", numpy.around(numpy.mean(scores, axis=0), 2))
开发者ID:pelluch,项目名称:data-mining,代码行数:9,代码来源:main.py

示例10: text_learning_experiment

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def text_learning_experiment(words_to_remove=[]):
    from_sara  = open("../text_learning/from_sara.txt", "r")
    from_chris = open("../text_learning/from_chris.txt", "r")
    word_data, authors = vectorize_emails(from_sara, from_chris, max_emails=300, words_to_remove=words_to_remove)
    features_train, features_test, labels_train, labels_test = \
        cross_validation.train_test_split(word_data, authors, test_size=0.1, random_state=42)
    vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5,
                                 stop_words='english')
    features_train = vectorizer.fit_transform(features_train)
    features_test  = vectorizer.transform(features_test).toarray()

    features_train = features_train[:150].toarray()
    labels_train   = labels_train[:150]

    clf = DecisionTreeClassifier()
    clf.fit(features_train, labels_train)
    predict_train = clf.predict(features_train)
    predict_test = clf.predict(features_test)
    print "train acc:", accuracy_score(labels_train, predict_train)
    print "test acc: ", accuracy_score(labels_test, predict_test)
    feature_index = np.argmax(clf.feature_importances_)
    feature_importance = clf.feature_importances_[feature_index]
    feature_name = vectorizer.get_feature_names()[feature_index]
    print "Most important feature, and relative importance:", feature_name, ":", feature_importance
    return feature_name, feature_importance
开发者ID:andrei-iusan,项目名称:ud120-projects,代码行数:27,代码来源:poi_id.py

示例11: train_dtc

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def train_dtc(X, y):
    """
    Create and train the Decision Tree Classifier.
    """
    dtc = DecisionTreeClassifier()
    dtc.fit(X, y)
    return dtc
开发者ID:texaspandaa,项目名称:Text-Mining,代码行数:9,代码来源:1.py

示例12: decision_tree_entropy

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def decision_tree_entropy(training_data):
    clf = DecisionTreeClassifier(criterion="entropy",random_state=0)
    clf.fit(training_data[0], training_data[1])
    #with open("/media/deeksha/e/Deeksha/Dropbox/Coursework/MachineLearning/HW3/entropy.dot", 'w') as f:
    #    f = tree.export_graphviz(clf, out_file=f)
    print "entropy:Number of Nodes", clf.tree_.node_count
    return clf
开发者ID:deekshachugh,项目名称:MachineLearning,代码行数:9,代码来源:DecisionTreeUsingGiniand+Entropy.py

示例13: tree

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def tree(labels,X,df,i):
  tree = DT(max_depth = 4)
  tree.fit(X,labels)
  impt = tree.feature_importances_
  para = tree.get_params()
  export_graphviz(tree, out_file = OUTPUT_DIRECTORY+str(i)+"_tree.dot", feature_names = df.columns)
  return impt
开发者ID:alicetang0618,项目名称:big_data,代码行数:9,代码来源:cluster.py

示例14: get_most_important_features

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def get_most_important_features(dataset, features_list):
    """Calculates the feature importances.
    Takes as input a dataset and a list of features.
    Creates an overfit Decision Tree and calculates the feature importances.
    Returns a list with the feature importances.
    """
    # creating an overfitted decision tree
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.metrics import accuracy_score

    data = featureFormat(dataset, features_list, sort_keys = True)
    labels, features = targetFeatureSplit(data)

    # new features filtered, NaN values removed
    features_train, features_test, labels_train, labels_test = train_test_split(features,
                                                                                    labels,
                                                                                    test_size=0.3,
                                                                                    random_state=42)

    clf = DecisionTreeClassifier()
    clf.fit(features_train, labels_train)
    pred = clf.predict(features_test)
    acc = accuracy_score(labels_test, pred)
    # uncomment to print the accuracy score
    #print "overfitted accuracy", acc

    # calculating feature importances
    feat_imp = clf.feature_importances_
    # uncomment to print the most important (common) ones
    #print feat_imp
    #for index, feature in enumerate(feat_imp):
    #    if feature > 0.2:
    #        print "spot:", index, ":", features_list[index+1], " | value:", feature
    return feat_imp
开发者ID:dumkydewilde,项目名称:ud-ps5-enron,代码行数:36,代码来源:example_project.py

示例15: fit

# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
 def fit(self, X, y):
     N = len(X)
     w = (1.0/N)*np.ones(N) #todo: weights global??
     self.T = 50
     self.weakClassifierEnsemble = []
     self.alphas = []
     self.nrOfClasses = 3
     for t in range(self.T):
         weakDecisionTree = DecisionTreeClassifier(random_state=0, max_depth=2) #max_depth=1 might be better in general
         #weakDecisionTree = DecisionTreeClassifier(random_state=0) #working, but very bad results (p < 0.5)
         weakDecisionTree.fit(X, y, sample_weight=w)
         predictions = weakDecisionTree.predict(X)
         e = np.sum(w[np.logical_not(predictions == y)])
         #if e == 0 or e >= (1 - (1.0/self.nrOfClasses)): #SAMME
         if e==0 or e >= 0.5: #if e==0: classifier not weak enough
             #finish model generation
             self.T = t
             print("aborting model generation early!!")
             return
         alpha = math.log((1.0-e)/e)
         #alpha = math.log((1.0-e)/e) + math.log(self.nrOfClasses - 1) #SAMME
         for i in range(N):
             if predictions[i] != y[i]:
                 w[i] *= math.exp(alpha)
     
         #normalize the weights
         w /= np.sum(w)
         
         self.alphas.append(alpha)
         self.weakClassifierEnsemble.append(weakDecisionTree)
开发者ID:HaythemSahbani,项目名称:Machine-learnig,代码行数:32,代码来源:AdaBoostCustom.py


注:本文中的sklearn.tree.DecisionTreeClassifier.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。