本文整理汇总了Python中sklearn.tree.DecisionTreeClassifier.fit方法的典型用法代码示例。如果您正苦于以下问题:Python DecisionTreeClassifier.fit方法的具体用法?Python DecisionTreeClassifier.fit怎么用?Python DecisionTreeClassifier.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.tree.DecisionTreeClassifier
的用法示例。
在下文中一共展示了DecisionTreeClassifier.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
class Ensemble:
def __init__(self, data):
self.rf = RandomForestClassifier(n_estimators=80, n_jobs=-1, min_samples_split=45, criterion='entropy')
self.lda = LDA()
self.dec = DecisionTreeClassifier(criterion='entropy')
self.ada = AdaBoostClassifier(n_estimators=500, learning_rate=0.25)
self.make_prediction(data)
def make_prediction(self, data):
'''
Make an ensemble prediction
'''
self.rf.fit(data.features_train, data.labels_train)
self.lda.fit(data.features_train, data.labels_train)
self.dec.fit(data.features_train, data.labels_train)
self.ada.fit(data.features_train, data.labels_train)
pre_pred = []
self.pred = []
ada_pred = self.ada.predict(data.features_test)
rf_pred = self.rf.predict(data.features_test)
lda_pred = self.lda.predict(data.features_test)
dec_pred = self.dec.predict(data.features_test)
for i in range(len(rf_pred)):
pre_pred.append([ rf_pred[i], lda_pred[i], dec_pred[i], ada_pred[i] ])
for entry in pre_pred:
pred_list = sorted(entry, key=entry.count, reverse=True)
self.pred.append(pred_list[0])
示例2: sampling_overfitting
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def sampling_overfitting(rate=3):
data = []
plt.figure(1)
plt.clf()
majN = 100
minoN = 20
jt = 1
x = np.random.rand(majN) * 2 - 1 + np.random.randn(majN) * jt
y = np.random.rand(majN) * 2 - 1 + np.random.randn(majN) * jt
plt.plot(x, y, 'o', ms=10)
for i in xrange(majN):
data.append([x[i], y[i], 0])
x = np.random.rand(minoN) + 0.1 + np.random.randn(minoN) * jt
y = np.random.rand(minoN) + 0.1 + np.random.randn(minoN) * jt
plt.plot(x, y, '*', ms=10)
for i in xrange(minoN):
for j in xrange(rate):
data.append([x[i], y[i], 1])
xlim, ylim = plt.xlim(), plt.ylim()
mdl = DecisionTreeClassifier(criterion='entropy')
data = np.array(data)
mdl.fit(data[:, :-1], data[:, -1])
x = np.linspace(xlim[0], xlim[1], 300)
y = np.linspace(ylim[0], ylim[1], 300)
X, Y = np.meshgrid(x, y)
grid_points = np.c_[X.ravel(), Y.ravel()]
Z = mdl.predict(grid_points)
Z = Z.reshape((len(x), -1))
plt.contourf(x, y, Z, 1)
plt.show()
示例3: quize1
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def quize1(data):
# 1. Select count of neighbors.Загрузите выборку из файла titanic.csv с помощью пакета Pandas.
# 2.Оставьте в выборке четыре признака: класс пассажира (Pclass), цену билета (Fare), возраст пассажира (Age) и его пол (Sex).
# 3.Обратите внимание, что признак Sex имеет строковые значения.
# 4.Выделите целевую переменную — она записана в столбце Survived.
# 5.В данных есть пропущенные значения — например, для некоторых пассажиров неизвестен их возраст.
# 6.Такие записи при чтении их в pandas принимают значение nan.
# Найдите все объекты, у которых есть пропущенные признаки, и удалите их из выборки.
# Обучите решающее дерево с параметром random_state=241 и остальными параметрами по умолчанию.
# Вычислите важности признаков и найдите два признака с
# наибольшей важностью. Их названия будут ответами для данной задачи
# (в качестве ответа укажите названия признаков через запятую или пробел, порядок не важен).
dataF = data[['Pclass', 'Fare', 'Age', 'Sex','Survived']]
dataF = dataF.dropna()
Y = dataF['Survived']
dataF = dataF[['Pclass', 'Fare', 'Age', 'Sex']]
clf = DecisionTreeClassifier(random_state=241)
dataF.loc[dataF['Sex'] != 'male', 'Sex'] = 0
dataF.loc[dataF['Sex'] == 'male', 'Sex'] = 1
print (dataF)
clf.fit(dataF, Y)
importances = clf.feature_importances_
print(importances)
# d = zip(dataF.columns, clf.feature_importanc_)
# print(d)
return
示例4: __init__
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
class Transformer:
def __init__(self, use_PCA=True):
self._clf = DecisionTreeClassifier(min_samples_leaf=10)
self._idx = None
self._scaler = StandardScaler()
self._trans = PCA('mle')
self._use_PCA = use_PCA
def fit(self, X, y):
X = np.array(X)
self._clf.fit(X, y)
self._idx = filter(lambda x: self._clf.feature_importances_[x] > 0, \
range(len(self._clf.feature_importances_)))
new_set = [X[i][self._idx] for i in xrange(len(X))]
# new_set = self._scaler.fit_transform(new_set)
if self._use_PCA:
new_set = self._trans.fit_transform(new_set)
return new_set
def transform(self, features):
features = features[self._idx]
# features = self._scaler.transform(features.astype(float))
if self._use_PCA:
features = self._trans.transform(features)
return features
示例5: decision_tree
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def decision_tree(train_bow,train_labels,test_bow,test_labels,bow_indexes):
print("Training decision tree")
dt_classifier=DecisionTreeClassifier()
dt_classifier.fit(train_bow,train_labels)
print("Testing decision tree")
test(dt_classifier,"dt",test_bow,test_labels,bow_indexes)
示例6: evaluateDecisionTree
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def evaluateDecisionTree(train_x,train_y,test_x,test_y):
clf = DecisionTreeClassifier(criterion='entropy',min_samples_leaf=5,max_depth=20)
clf.fit(train_x,train_y)
p = clf.predict_proba(test_x)[:,1]
auc = roc_auc_score(test_y,p)
plotAUC(test_y,clf.predict_proba(test_x)[:,1],'DT')
return auc
示例7: test_graphviz_errors
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def test_graphviz_errors():
"""Check for errors of export_graphviz"""
clf = DecisionTreeClassifier(max_depth=3, min_samples_split=1)
clf.fit(X, y)
out = StringIO()
assert_raises(IndexError, export_graphviz, clf, out, feature_names=[])
示例8: buildTree
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def buildTree(options, treefile, dataFile = None):
dt = loadTree(treefile)
if dt is not None:
return dt
if dataFile is None:
raise ValueError("No data file specified")
dt = DecisionTreeClassifier(min_samples_split=20, random_state=99)
files = []
featureFrames = []
targetFrames = []
if os.path.isdir(dataFile):
files = getFiles(dataFile, ".csv")
else:
files.append(dataFile)
for _file in files:
print("Loading data %s" % _file)
(featureValues, targetValues, features, df) = loadData(_file, options)
featureFrames.append(featureValues)
targetFrames.append(targetValues)
dt.fit(pd.concat(featureFrames), pd.concat(targetFrames))
saveTree(treefile, dt)
print("Building graph")
visualize_tree(treefile, dt, features)
return dt
示例9: decision_trees
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def decision_trees(features, labels):
classifier = DecisionTreeClassifier(random_state=0, criterion="entropy")
classifier.fit(features, labels)
scores = cross_validation.cross_val_score(
classifier, features, labels, cv=10, score_func=metrics.precision_recall_fscore_support
)
print_table("Decision Trees", numpy.around(numpy.mean(scores, axis=0), 2))
示例10: text_learning_experiment
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def text_learning_experiment(words_to_remove=[]):
from_sara = open("../text_learning/from_sara.txt", "r")
from_chris = open("../text_learning/from_chris.txt", "r")
word_data, authors = vectorize_emails(from_sara, from_chris, max_emails=300, words_to_remove=words_to_remove)
features_train, features_test, labels_train, labels_test = \
cross_validation.train_test_split(word_data, authors, test_size=0.1, random_state=42)
vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5,
stop_words='english')
features_train = vectorizer.fit_transform(features_train)
features_test = vectorizer.transform(features_test).toarray()
features_train = features_train[:150].toarray()
labels_train = labels_train[:150]
clf = DecisionTreeClassifier()
clf.fit(features_train, labels_train)
predict_train = clf.predict(features_train)
predict_test = clf.predict(features_test)
print "train acc:", accuracy_score(labels_train, predict_train)
print "test acc: ", accuracy_score(labels_test, predict_test)
feature_index = np.argmax(clf.feature_importances_)
feature_importance = clf.feature_importances_[feature_index]
feature_name = vectorizer.get_feature_names()[feature_index]
print "Most important feature, and relative importance:", feature_name, ":", feature_importance
return feature_name, feature_importance
示例11: train_dtc
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def train_dtc(X, y):
"""
Create and train the Decision Tree Classifier.
"""
dtc = DecisionTreeClassifier()
dtc.fit(X, y)
return dtc
示例12: decision_tree_entropy
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def decision_tree_entropy(training_data):
clf = DecisionTreeClassifier(criterion="entropy",random_state=0)
clf.fit(training_data[0], training_data[1])
#with open("/media/deeksha/e/Deeksha/Dropbox/Coursework/MachineLearning/HW3/entropy.dot", 'w') as f:
# f = tree.export_graphviz(clf, out_file=f)
print "entropy:Number of Nodes", clf.tree_.node_count
return clf
示例13: tree
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def tree(labels,X,df,i):
tree = DT(max_depth = 4)
tree.fit(X,labels)
impt = tree.feature_importances_
para = tree.get_params()
export_graphviz(tree, out_file = OUTPUT_DIRECTORY+str(i)+"_tree.dot", feature_names = df.columns)
return impt
示例14: get_most_important_features
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def get_most_important_features(dataset, features_list):
"""Calculates the feature importances.
Takes as input a dataset and a list of features.
Creates an overfit Decision Tree and calculates the feature importances.
Returns a list with the feature importances.
"""
# creating an overfitted decision tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
data = featureFormat(dataset, features_list, sort_keys = True)
labels, features = targetFeatureSplit(data)
# new features filtered, NaN values removed
features_train, features_test, labels_train, labels_test = train_test_split(features,
labels,
test_size=0.3,
random_state=42)
clf = DecisionTreeClassifier()
clf.fit(features_train, labels_train)
pred = clf.predict(features_test)
acc = accuracy_score(labels_test, pred)
# uncomment to print the accuracy score
#print "overfitted accuracy", acc
# calculating feature importances
feat_imp = clf.feature_importances_
# uncomment to print the most important (common) ones
#print feat_imp
#for index, feature in enumerate(feat_imp):
# if feature > 0.2:
# print "spot:", index, ":", features_list[index+1], " | value:", feature
return feat_imp
示例15: fit
# 需要导入模块: from sklearn.tree import DecisionTreeClassifier [as 别名]
# 或者: from sklearn.tree.DecisionTreeClassifier import fit [as 别名]
def fit(self, X, y):
N = len(X)
w = (1.0/N)*np.ones(N) #todo: weights global??
self.T = 50
self.weakClassifierEnsemble = []
self.alphas = []
self.nrOfClasses = 3
for t in range(self.T):
weakDecisionTree = DecisionTreeClassifier(random_state=0, max_depth=2) #max_depth=1 might be better in general
#weakDecisionTree = DecisionTreeClassifier(random_state=0) #working, but very bad results (p < 0.5)
weakDecisionTree.fit(X, y, sample_weight=w)
predictions = weakDecisionTree.predict(X)
e = np.sum(w[np.logical_not(predictions == y)])
#if e == 0 or e >= (1 - (1.0/self.nrOfClasses)): #SAMME
if e==0 or e >= 0.5: #if e==0: classifier not weak enough
#finish model generation
self.T = t
print("aborting model generation early!!")
return
alpha = math.log((1.0-e)/e)
#alpha = math.log((1.0-e)/e) + math.log(self.nrOfClasses - 1) #SAMME
for i in range(N):
if predictions[i] != y[i]:
w[i] *= math.exp(alpha)
#normalize the weights
w /= np.sum(w)
self.alphas.append(alpha)
self.weakClassifierEnsemble.append(weakDecisionTree)