本文整理汇总了Python中sklearn.multiclass.OutputCodeClassifier.fit方法的典型用法代码示例。如果您正苦于以下问题:Python OutputCodeClassifier.fit方法的具体用法?Python OutputCodeClassifier.fit怎么用?Python OutputCodeClassifier.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.multiclass.OutputCodeClassifier
的用法示例。
在下文中一共展示了OutputCodeClassifier.fit方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_ecoc_fit_predict
# 需要导入模块: from sklearn.multiclass import OutputCodeClassifier [as 别名]
# 或者: from sklearn.multiclass.OutputCodeClassifier import fit [as 别名]
def test_ecoc_fit_predict():
# A classifier which implements decision_function.
ecoc = OutputCodeClassifier(LinearSVC(), code_size=2)
ecoc.fit(iris.data, iris.target).predict(iris.data)
assert_equal(len(ecoc.estimators_), n_classes * 2)
# A classifier which implements predict_proba.
ecoc = OutputCodeClassifier(MultinomialNB(), code_size=2)
ecoc.fit(iris.data, iris.target).predict(iris.data)
assert_equal(len(ecoc.estimators_), n_classes * 2)
示例2: train_svm
# 需要导入模块: from sklearn.multiclass import OutputCodeClassifier [as 别名]
# 或者: from sklearn.multiclass.OutputCodeClassifier import fit [as 别名]
def train_svm(labels,array, num_folds, num_jobs, params = 2):
#obtain the best parameter settings for an svm outputcode classifier
bestParameters = dict()
if len(labels) > 2:
print("outputcodeclassifier")
#param_grid = {'estimator__C': [0.001, 0.005, 0.01,0.1, 0.5, 1,2.5, 5, 10,15,25, 50,75, 100, 500, 1000],
# 'estimator__kernel': ['linear','rbf','poly'],
# 'estimator__gamma': [0.0005,0.001, 0.002, 0.008,0.016, 0.032,0.064, 0.128,0.256, 0.512, 1.024, 2.048],
# 'estimator__degree': [1,2,3,4]}
param_grid = {'estimator__C': [0.001, 0.005],
'estimator__kernel': ['linear','rbf'],
'estimator__gamma': [0.0005,0.001],
'estimator__degree': [1]}
model = OutputCodeClassifier(svm.SVC(probability=True))
else:
print("svc model")
param_grid = {'C': [0.001, 0.005, 0.01, 0.5, 1, 5, 10, 50, 100, 500, 1000],
'kernel': ['linear','rbf','poly'],
'gamma': [0.0005, 0.002, 0.008, 0.032, 0.128, 0.512, 1.024, 2.048],
'degree': [1,2,3,4]}
model = svm.SVC(probability=True)
paramsearch = RandomizedSearchCV(model, param_grid, cv=num_folds, verbose=2,n_iter = params,n_jobs=num_jobs)
print("Grid search...")
paramsearch.fit(array,numpy.asarray(labels))
print("Prediction...")
parameters = paramsearch.best_params_
for parameter in parameters.keys():
print(parameter + ": " + str(parameters[parameter]) + "\n")
print("best score: " + str(paramsearch.best_score_) + "\n\n")
#for score in paramsearch.grid_scores_:
# print 'mean score:',score.mean_validation_score
# print 'list scores:',score.cv_validation_scores
#train an svm outputcode classifier using the best parameters
if len(labels) > 2:
test = svm.SVC(probability=True, C=parameters['estimator__C'],
kernel=parameters['estimator__kernel'],gamma=parameters['estimator__gamma'],
degree=parameters['estimator__degree'])
out_test = OutputCodeClassifier(test,n_jobs=1)
out_test.fit(array,labels)
else:
test = svm.SVC(probability=True, C=parameters['C'],
kernel=parameters['kernel'],gamma=parameters['gamma'],
degree=parameters['degree'])
#test.fit(array,labels)
return test
示例3: get_haar_features
# 需要导入模块: from sklearn.multiclass import OutputCodeClassifier [as 别名]
# 或者: from sklearn.multiclass.OutputCodeClassifier import fit [as 别名]
row = []
for (top_left, bottom_right) in rectangles:
row += get_haar_features(im, top_left, bottom_right)
train_ecoc_table[ind] = row
test_ecoc_table = np.zeros(shape=(np.shape(test_images)[0], 200))
for ind, im in enumerate(test_images):
row = []
for (top_left, bottom_right) in rectangles:
row += get_haar_features(im, top_left, bottom_right)
test_ecoc_table[ind] = row
clf = OutputCodeClassifier(AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), n_estimators=200), code_size=5, random_state=0)
clf.fit(train_ecoc_table, labels)
train_pred = np.array(clf.predict(train_ecoc_table))
print "Digits Training Accuracy: %f" % (np.sum(train_pred == np.array(labels)).astype(np.float)/np.shape(train_pred)[0])
test_pred = np.array(clf.predict(test_ecoc_table))
print "Digits Testing Accuracy: %f" % (np.sum(test_pred == np.array(test_labels)).astype(np.float)/np.shape(test_pred)[0])
# ecoc_table = []
# for im in images:
#
# im_preprocess = np.matrix([[np.sum(im[:i,:j]) for i in range(1, 29)] for j in range(1, 29)])
#
# def get_black_rectangle(top_left, bottom_right):
# x1, y1 = top_left
# x2, y2 = bottom_right
示例4: LabelEncoder
# 需要导入模块: from sklearn.multiclass import OutputCodeClassifier [as 别名]
# 或者: from sklearn.multiclass.OutputCodeClassifier import fit [as 别名]
train_ingredients.append(' '.join(ings))
#construct test_ingredients
for entry in test_set:
ings = [WordNetLemmatizer().lemmatize(re.sub('[^A-Za-z]', ' ', w)) for w in entry['ingredients']]
test_ingredients.append(' '.join(ings))
#used to encode labels as numbers for use with RandomForestClassifier
le = LabelEncoder()
#encode cuisines as numbers
train_cuisines = le.fit_transform(train_cuisines)
#used to create bag of ingredients vocabulary and create features for each entry
vectorizer = CountVectorizer()
train_features = vectorizer.fit_transform(train_ingredients).toarray()
test_features = vectorizer.transform(test_ingredients).toarray()
clf = OutputCodeClassifier(LinearSVC(random_state=0), code_size=2, random_state=2)
result = clf.fit(train_features, train_cuisines).predict(test_features)
output = pd.DataFrame(data={'id':test_ids, 'cuisine':le.inverse_transform(result)})
#force explicit ordering of columns
output = output[['id', 'cuisine']]
output.to_csv('ecoc.csv', index=False)
示例5: Classifier
# 需要导入模块: from sklearn.multiclass import OutputCodeClassifier [as 别名]
# 或者: from sklearn.multiclass.OutputCodeClassifier import fit [as 别名]
#.........这里部分代码省略.........
def predict(self,ts):
testvectors = self.vectorize(ts)
predictions = []
for i,t in enumerate(testvectors):
classification = self.clf.predict(t)
proba = self.clf.predict_proba(t)
classification_label = self.labeldict_back[classification[0]]
if len(ts[0]["meta"]) == 6:
predictions.append([ts[i]["meta"][5], ts[i]["label"] + " " + classification_label, \
" ".join([str(round(x,2)) for x in proba.tolist()[0]])])
else:
predictions.append([" ".join([x for x in ts[i]["ngrams"] if not re.search("_",x)]), ts[i]["label"] + " " + classification_label, \
" ".join([str(round(x,2)) for x in proba.tolist()[0]])])
return predictions
def train_svm(self,params = 10):
#obtain the best parameter settings for an svm outputcode classifier
if len(self.labels) > 2:
print("outputcodeclassifier")
param_grid = {'estimator__C': [0.001, 0.005, 0.01, 0.5, 1, 5, 10, 50, 100, 500, 1000],
'estimator__kernel': ['linear','rbf','poly'],
'estimator__gamma': [0.0005, 0.002, 0.008, 0.032, 0.128, 0.512, 1.024, 2.048],
'estimator__degree': [1,2,3,4]}
model = OutputCodeClassifier(svm.SVC(probability=True))
else:
print("svc model")
param_grid = {'C': [0.001, 0.005, 0.01, 0.5, 1, 5, 10, 50, 100, 500, 1000],
'kernel': ['linear','rbf','poly'],
'gamma': [0.0005, 0.002, 0.008, 0.032, 0.128, 0.512, 1.024, 2.048],
'degree': [1,2,3,4]}
model = svm.SVC(probability=True)
paramsearch = RandomizedSearchCV(model, param_grid, cv=5, verbose=2,n_iter = params,n_jobs=self.jobs)
print("Grid search...")
paramsearch.fit(self.training_csr,numpy.asarray(self.trainlabels))
print("Prediction...")
#print the best parameters to the file
parameters = paramsearch.best_params_
self.outstring = "best parameter settings:\n"
for parameter in parameters.keys():
self.outstring += (parameter + ": " + str(parameters[parameter]) + "\n")
self.outstring += ("best score: " + str(paramsearch.best_score_) + "\n\n")
#train an svm outputcode classifier using the best parameters
if len(self.labels) > 2:
clf = svm.SVC(probability=True, C=parameters['estimator__C'],
kernel=parameters['estimator__kernel'],gamma=parameters['estimator__gamma'],
degree=parameters['estimator__degree'])
self.clf = OutputCodeClassifier(clf,n_jobs=self.jobs)
self.clf.fit(self.training_csr,self.trainlabels)
else:
self.clf = svm.SVC(probability=True, C=parameters['C'],
kernel=parameters['kernel'],gamma=parameters['gamma'],
degree=parameters['degree'])
self.clf.fit(self.training_csr,self.trainlabels)
def train_nb(self):
self.clf = naive_bayes.MultinomialNB()
self.clf.fit(self.training_csr,self.trainlabels)
def train_decisiontree(self):
self.clf = tree.DecisionTreeClassifier()
self.clf.fit(self.training_csr.toarray(),self.trainlabels)
def tenfold_train(self,voting,classifiers = [],p = 10):
kf = cross_validation.KFold(len(self.training), n_folds=10)
training = deepcopy(self.training)
feat = deepcopy(self.features)
示例6: oc_classify
# 需要导入模块: from sklearn.multiclass import OutputCodeClassifier [as 别名]
# 或者: from sklearn.multiclass.OutputCodeClassifier import fit [as 别名]
def oc_classify(X,Y):
size = np.count_nonzero(sp.unique(Y))
clf = OutputCodeClassifier(LinearSVC(),code_size=size)
clf.fit(X,Y)
return clf
示例7: range
# 需要导入模块: from sklearn.multiclass import OutputCodeClassifier [as 别名]
# 或者: from sklearn.multiclass.OutputCodeClassifier import fit [as 别名]
y_train = labels[100:172,i]
X_test = sample2
y_test = labels[272:,i]
else:
X_train = training
y_train = labels[:172,i]
X_test = sampletest
y_test = labels[172:,i]
box = np.zeros([6,6])
accuracy = np.zeros(100)
for m in range(0,100):
posterior = np.empty([100,72,6])
gbc = GradientBoostingClassifier(n_estimators=60, max_depth=3)
occ = OutputCodeClassifier(gbc)
y_pred = occ.fit(X_train, y_train).predict(X_test)
n=0
for i in range(0,len(y_pred)):
if y_pred[i] == y_test[i]:
#print i, y_pred[i], y_test[i]
n = n+1
accuracy[m] = accuracy[m]+1
box[y_test[i]-1,y_pred[i]-1] = box[y_test[i]-1,y_pred[i]-1] + 1
#posterior[m] = knc.predict_proba(X_test)
print np.mean(accuracy)/0.72, np.std(accuracy)/0.72
#print sum(accuracy[0:8])/8.0, sum(accuracy[8:18])/10.0, sum(accuracy[18:30])/12.0, sum(accuracy[56:72])/16.0, sum(accuracy[30:43])/13.0, sum(accuracy[43:56])/13.0, sum(accuracy)/72.0
'''
means = np.empty([72,6])
stds = np.empty([72,6])
grid = np.empty([6,6])