本文整理汇总了Python中sklearn.svm.LinearSVC.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python LinearSVC.fit_transform方法的具体用法?Python LinearSVC.fit_transform怎么用?Python LinearSVC.fit_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.svm.LinearSVC
的用法示例。
在下文中一共展示了LinearSVC.fit_transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: feature_selection
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
def feature_selection(input_file='train_out.csv',
limit_number=40000,
fs_pkl='feature_selection.pkl'):
reader = csv.reader(open(input_file), delimiter=',')
count = 0
x = []
y = []
for data in reader:
yi = int(data[-1])
xi = map(float, data[:-1])
x.append(xi)
y.append(yi)
count += 1
print count
if count == 40000:
break
x = np.array(x)
y = np.array(y)
if not fs_pkl:
fs = LinearSVC(C=0.01, penalty="l1", dual=False,verbose=2)
x_new = fs.fit_transform(x, y)
pickle.dump(fs, open('feature_selection.pkl','wb'))
else:
fs = pickle.load(open(fs_pkl))
x_new = fs.fit_transform(x, y)
#estimator = SVR(kernel="linear")
#selector = RFE(estimator,0.3, step=0.3,verbose=2)
#selector = selector.fit(x, y)
return x_new
示例2: binary_search
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
def binary_search(n,error,train_counts_tf,target_vals):
'''
get the number of features close to n within error by evaluating the SVM function
with variable values of C and L1 distance measure to decrease/increase number of features.
n -- number of final features
error -- error within which to get the number of features
train_counts_tf -- tf-idf transformed training counts
target_vals -- target values in the training set
returns decreased/transformed train counts and Lin. SVM classifier
'''
c = 0.1
lsvm = LinearSVC(C=c,penalty="l1",dual=False)
tc = lsvm.fit_transform(train_counts_tf, target_vals)
features = tc.shape[1]
if abs(features - n) < error: return tc, lsvm
i=0
new_c = c
if features < n:
while features < n:
c = new_c
new_c = new_c*2
print "c %f, new_c %f, iteration %d, features %d" % (c,new_c,i, features)
lsvm = LinearSVC(C=new_c,penalty="l1",dual=False)
tc = lsvm.fit_transform(train_counts_tf, target_vals)
features = tc.shape[1]
i+=1
else:
while features > n:
c = new_c
new_c = new_c/2
print "c %f, new_c %f, iteration %d, features %d" % (c,new_c,i, features)
lsvm = LinearSVC(C=new_c,penalty="l1",dual=False)
tc = lsvm.fit_transform(train_counts_tf, target_vals)
features = tc.shape[1]
i+=1
if new_c > c:
upper = new_c
lower = c
else:
upper = c
lower = new_c
while abs(n - features) > error:
middle = (upper+lower)/2
lsvm = LinearSVC(C=middle,penalty="l1",dual=False)
tc = lsvm.fit_transform(train_counts_tf, target_vals)
features = tc.shape[1]
if features > n:
upper = middle
else:
lower = middle
print "lower %f, upper %f, iteration %d, features %d" % (lower,upper,i, features)
i+=1
return tc,lsvm
示例3: l1FeatureSelection
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
def l1FeatureSelection():
X = np.array(trainingData, dtype=float)
X1 = np.array(testData, dtype=float)
y = np.array(trainingDataLabels, dtype=float)
model = LinearSVC(C=0.01, penalty="l1", dual=False)
newX = model.fit_transform(X, y)
newX1 = model.transform(X1)
return (newX, newX1)
示例4: L1LinearSVC
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
class L1LinearSVC(LinearSVC):
def fit(self, X, y):
self.transformer_ = LinearSVC(penalty="l1", dual=False, tol=1e-3)
X = self.transformer_.fit_transform(X, y)
return LinearSVC.fit(self, X, y)
def predict(self, X):
X = self.transformer_.transform(X)
return LinearSVC.predict(self, X)
示例5: call_GridParamSearch_featfilt
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
def call_GridParamSearch_featfilt(X, y) :
'''
(def is Currently just a cut & paste from "main".)
Calles def GridParamSearch , (which uses randomized CV to find odel param)
Used to try different ml models, then get their optimal paramters
'''
print("SPARSE (L1) EXT gridparam scores:")
# clf = Pipeline([
# ('feature_selection', LinearSVC(penalty="l1", loss='l1',dual=False, class_weight='auto')),
# ('classification', ExtraTreesClassifier(n_jobs=3)
# )])
'Sparse; L1 penalized features selection prior to RF fitting/prediction'
clf_svm = LinearSVC(penalty="l1", loss='l2', dual=False, class_weight='auto')
clf_logit = LogisticRegression(penalty="l1", dual=False, class_weight='auto')
'http://scikit-learn.org/0.13/auto_examples/plot_feature_selection.html'
print('Original features matrix:')
print(X.shape)
# Univariate feature selection with F-test for feature scoring
# We use the default selection function: the 20% most significant features
# selector = SelectPercentile(f_classif, percentile=20)
selector = SelectPercentile(chi2, percentile=20)
X_anova = selector.fit_transform(X, y)
print(
'New (2 f_classif) Using statistical feature selection: features matrix is:')
print(X_anova.shape)
# lda = LDA(n_components=10)
# X_lda = lda.fit_transform(X, y)
# print('New LDA filtered features matrix:')
# print(X_lda.shape)
X_svm = clf_svm.fit_transform(X, y) #Get Sparse feature selections..
# print(clf.feature_importances_ )
print('New sparse (SVM filtered) features matrix:')
print(X_svm.shape)
print("Res of SVM fitting of (F scores filtered =2) for more feature selection:")
X_doubleFilt_svm_f = clf_svm.fit_transform(X_anova, y)
print(X_doubleFilt_svm_f.shape)
print("param search on sparse features matrix")
GridParamSearch(param_dist=Tree_param_dist, clf=clf_EXT, X=X_svm, y=y)
示例6: L1LinearSVC
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
class L1LinearSVC(LinearSVC): # Creating new class L1LinearSVC with two methods, fit and predict
def fit(self, X, y): # This method acts on itself with X and y
self.transformer_ = LinearSVC(penalty="l1",
dual=False, tol=1e-3) # This is changing all the defaults for LinearSVC
X = self.transformer_.fit_transform(X, y) # Assigning X with the new parameters for LinearSVC performing fit_transform operation
return LinearSVC.fit(self, X, y) # Returns the fit with the new X with the default LinearSVC parameters
def predict(self, X): # Predicts the outcome based on the test dataset X
X = self.transformer_.transform(X) # Perform a transform on X using the updated defaults for LinearSVC
return LinearSVC.predict(self, X) # returns the predicted score on the transformed data X
示例7: L1LinearSVC
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
class L1LinearSVC(LinearSVC):
def fit(self,X,y):
#The smaller C , the stronger the regularization.
#The more regularization, the more sparsity.
self.transformer_ = LinearSVC(penalty="l1",dual=False,tol=1e-3)
X = self.transformer_.fit_transform(X,y)
return LinearSVC.fit(self,X,y)
def predict(self,X):
X = self.transformer_.transform(X)
return LinearSVC.predict(self,X)
示例8: baseline_model
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
def baseline_model(X_train,y_train,X_test,y_test):
feature_selection = LinearSVC(C=10, penalty='l1', dual=False)
X_train_new = feature_selection.fit_transform(X_train, y_train)
X_test_new = feature_selection.transform(X_test)
print X_train_new.shape
svm = LinearSVC(C=1)
svm.fit(X_train_new, y_train)
predicted = svm.predict(X_test_new)
return predicted
示例9: getBestFeatsFromSvm
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
def getBestFeatsFromSvm(kval):
clf = LinearSVC(C=0.1)
clf.fit(X, y)
clf = LinearSVC(C=0.1)
X_new = clf.fit_transform(X, y)
bestFeats = []
for i in range(3):
arr = numpy.argsort(clf.coef_[i])[-kval:]
rev = arr[::-1]
bestFeats.append(rev)
return bestFeats
skf = StratifiedKFold(y, 5)
accuracies = sklearn.cross_validation.cross_val_score(clf, X, y, cv=skf, n_jobs=8 )
ret = numpy.argsort(clf.coef_)[-kval:]
ret.reverse()
return ret
示例10: LinearSVC_custom
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
def LinearSVC_custom(header,x_train, y_train,x_test,y_test,color):
clf = LinearSVC(C=1, penalty="l1", dual=False) #,verbose=1) ,tol=0.0001,fit_intercept=True, intercept_scaling=1)
clf.fit(x_train.values, y_train.values)
x_tranformed= clf.fit_transform(x_train.values, y_train.values) # transformed X to its most important features
clf.predict(x_test.values)
print "Goodness of fit using the LinearSVC is %f \n \n " %clf.score(x_test.values, y_test.values) # Goodness of fit
#clf.coef_ # estimate set of coeffs - This will actually store the coeffs as "0" for the vars we wont be using, so it does the trick of fetching the corresponding indices
important_features=[]
m=clf.coef_[0]
index=0
for i in m:
if i == 0:
pass
else : #not zero, meaning this atribute defines the transformed dataset from the orignal linear combination data-set
important_features.append(index)
index=index+1
features=[header[i] for i in important_features]
# returning the set of important features with the corresponding "model" (color of wine is the model)
print "The important features for %s color are : %s \n \n " %(color, str(features).replace("'",'').replace("[",'').replace("]",''))
return features
示例11: baseline_model
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
def baseline_model(X_train,y_train,X_test,y_test):
#dimension reduction
feature_selection = LinearSVC(C=1, penalty="l1", dual=False)
X_train_reduced = feature_selection.fit_transform(X_train, y_train)
X_test_reduced = feature_selection.transform(X_test)
#metrics learning
ml = LMNN(k=4,min_iter=50,max_iter=1000, learn_rate=1e-7)
ml.fit(X_train_reduced,y_train)
X_train_new = ml.transform(X_train_reduced)
X_test_new = ml.transform(X_test_reduced)
neigh = KNeighborsClassifier(n_neighbors=4)
neigh.fit(X_train_new, y_train)
predicted = neigh.predict(X_test_new)
#pickle.dump(ml, open('dist_metrics', 'w'))
return predicted
示例12: featureSelection
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
def featureSelection(X_train,X_test,X_val,y_train,log,tech,C):
if (tech == 'VarTh'):
sel = VarianceThreshold(threshold=0.01)
X_train_new = sel.fit_transform(X_train.todense())
X_test_new = sel.transform(X_test.todense())
X_val_new = sel.transform(X_val.todense())
if (log):
X_train_new = np.log(X_train_new+1)
X_test_new = np.log(X_test_new+1)
X_val_new = np.log(X_val_new+1)
if (tech == 'LinearSVC'):
mod = LinearSVC(C=C, penalty="l1", dual=False)
X_train_new = mod.fit_transform(X_train.todense(), y_train)
X_test_new = mod.transform(X_test.todense())
X_val_new = mod.transform(X_val.todense())
if (log):
X_train_new = np.log(X_train_new+1)
X_test_new = np.log(X_test_new+1)
X_val_new = np.log(X_val_new+1)
return X_train_new, X_test_new , X_val_new
示例13: baseline_model
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
def baseline_model(X_train,y_train,X_test,y_test):
print X_train.shape
feature_selection = LinearSVC(C=1, penalty="l1", dual=False)
X_train_new = feature_selection.fit_transform(X_train, y_train)
X_test_new = feature_selection.transform(X_test)
print X_train_new.shape
print X_test_new.shape
F = RandomForestClassifier(n_estimators=300,
criterion='gini',
min_samples_split=8,
min_samples_leaf=3, max_features='auto',
max_leaf_nodes=4)
F.fit(X_train_new,y_train)
predicted = F.predict(X_test_new)
return predicted
示例14: WithoutDirty
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
class WithoutDirty(object):
def getData(self, filename):
X, y = load_svmlight_file(filename)
X, y = shuffle(X, y)
return (X, y)
def getTextData(self):
return self.getData('../feature_set/lowertext.scale')
def getTrained(self, X, y):
clf = LogisticRegression(penalty='l2', tol=1e-6, C=1e-1)
clf.fit(X, y)
return clf
def getTrainedOnClean(self, X, y):
# perform feature selection
self.fs = LinearSVC(penalty='l1', dual=False, tol=1e-4,
C=1e1, multi_class='ovr', fit_intercept=True)
X_reduced = self.fs.fit_transform(X, y)
print 'feature dimension:', X_reduced.shape
# train a classifier
#clf = LinearSVC(penalty='l2', loss='l2', dual=True, tol=1e-4,
# C=1e-1, multi_class='ovr', fit_intercept=True)
clf = LogisticRegression(penalty='l2', tol=1e-6, C=1e0)
clf.fit(X_reduced, y)
# discard examples with low confidence
pre_scores = clf.predict_proba(X_reduced)
max_confi = pre_scores.max(axis = 1).tolist()
idx_max_confi = [[], [], [], []]
for idx, conf in enumerate(max_confi):
idx_max_confi[int(y[idx])].append((idx, conf))
chosen_indices = []
for label, confs in enumerate(idx_max_confi):
confs = sorted(confs,
key = operator.itemgetter(1),
reverse = True)
max_indices = map(operator.itemgetter(0), confs)
max_indices = max_indices[:int(len(max_indices) * .9)]
chosen_indices.extend(max_indices)
X_clean = X[chosen_indices]
y_clean = y[chosen_indices]
print 'cleaned feature dimension', X_clean.shape
print Counter(y_clean).most_common()
# train the classifier again with clean data
clf.fit(X_clean, y_clean)
return clf
def getPredicted(self, clf, X):
#X = self.fs.transform(X)
predicted = clf.predict(X)
return predicted
def run(self):
X, y = self.getTextData()
kfold = cross_validation.KFold(X.shape[0], k = 5)
tester = tests.tester(4)
for train, test in kfold:
# in the training stage, we should discard the part of the training
# data after the feature selection.
clf = self.getTrained(X[train], y[train])
predicted = self.getPredicted(clf, X[test])
tester.record(y[test], predicted)
print 'accuracy:', tester.accuracy()
print 'confusion matrix:'
print tester.confusionMatrix()
示例15: LinearSVC
# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
MODEL_NAME = 'model_16_random_forest_calibrated_feature_selection'
MODE = 'cv' # cv|submission|holdout
# import data
train, labels, test, _, _ = utils.load_data()
# transform counts to TFIDF features
tfidf = feature_extraction.text.TfidfTransformer(smooth_idf=False)
train = np.append(train, tfidf.fit_transform(train).toarray(), axis=1)
test = np.append(test, tfidf.transform(test).toarray(), axis=1)
# feature selection
feat_selector = LinearSVC(C=0.095, penalty='l1', dual=False)
train = feat_selector.fit_transform(train, labels)
test = feat_selector.transform(test)
print train.shape
# encode labels
lbl_enc = preprocessing.LabelEncoder()
labels = lbl_enc.fit_transform(labels)
# train classifier
clf = ensemble.ExtraTreesClassifier(n_jobs=3, n_estimators=600, max_features=20, min_samples_split=3,
bootstrap=False, verbose=3, random_state=23)
if MODE == 'cv':
开发者ID:ShrikanthRamanathan,项目名称:kaggle_otto,代码行数:32,代码来源:random_forest_calibrated_feature_selection.py