当前位置: 首页>>代码示例>>Python>>正文


Python LinearSVC.fit_transform方法代码示例

本文整理汇总了Python中sklearn.svm.LinearSVC.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python LinearSVC.fit_transform方法的具体用法?Python LinearSVC.fit_transform怎么用?Python LinearSVC.fit_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.svm.LinearSVC的用法示例。


在下文中一共展示了LinearSVC.fit_transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: feature_selection

# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
def feature_selection(input_file='train_out.csv',
                      limit_number=40000,
                      fs_pkl='feature_selection.pkl'):
    reader = csv.reader(open(input_file), delimiter=',')
    count = 0
    x = []
    y = []
    for data in reader:
        yi = int(data[-1])
        xi = map(float, data[:-1])
        x.append(xi)
        y.append(yi)
        count += 1
        print count
        if count == 40000:
            break

    x = np.array(x)
    y = np.array(y)
    if not fs_pkl:
        fs = LinearSVC(C=0.01, penalty="l1", dual=False,verbose=2)
        x_new = fs.fit_transform(x, y)
        pickle.dump(fs, open('feature_selection.pkl','wb'))
    else:
        fs = pickle.load(open(fs_pkl))
        x_new = fs.fit_transform(x, y)

    #estimator = SVR(kernel="linear")
    #selector = RFE(estimator,0.3, step=0.3,verbose=2)
    #selector = selector.fit(x, y)
    return x_new
开发者ID:jimmy9988,项目名称:MDST_SpringLeaf,代码行数:33,代码来源:feature_generator.py

示例2: binary_search

# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
def binary_search(n,error,train_counts_tf,target_vals):
    '''
    get the number of features close to n within error by evaluating the SVM function
    with variable values of C and L1 distance measure to decrease/increase number of features.

    n               --  number of final features
    error           --  error within which to get the number of features
    train_counts_tf --  tf-idf transformed training counts
    target_vals     --  target values in the training set

    returns decreased/transformed train counts and Lin. SVM classifier
    '''
    c = 0.1
    lsvm = LinearSVC(C=c,penalty="l1",dual=False) 
    tc = lsvm.fit_transform(train_counts_tf, target_vals)
    features = tc.shape[1]
    if abs(features - n) < error: return tc, lsvm
    i=0
    new_c = c
    if features < n:
        while features < n:
            c = new_c
            new_c = new_c*2
            print "c %f, new_c %f, iteration %d, features %d" % (c,new_c,i, features)
            lsvm = LinearSVC(C=new_c,penalty="l1",dual=False) 
            tc = lsvm.fit_transform(train_counts_tf, target_vals)
            features = tc.shape[1]
            i+=1
    else:
        while features > n:
            c = new_c
            new_c = new_c/2
            print "c %f, new_c %f, iteration %d, features %d" % (c,new_c,i, features)
            lsvm = LinearSVC(C=new_c,penalty="l1",dual=False) 
            tc = lsvm.fit_transform(train_counts_tf, target_vals)
            features = tc.shape[1]
            i+=1
    if new_c > c:
        upper = new_c
        lower = c
    else:
        upper = c
        lower = new_c
    while abs(n - features) > error:
        middle = (upper+lower)/2
        lsvm = LinearSVC(C=middle,penalty="l1",dual=False) 
        tc = lsvm.fit_transform(train_counts_tf, target_vals)
        features = tc.shape[1]
        if features > n:
            upper = middle
        else:
            lower = middle
        print "lower %f, upper %f, iteration %d, features %d" % (lower,upper,i, features)
        i+=1
    return tc,lsvm
开发者ID:joshboon,项目名称:okstereotype,代码行数:57,代码来源:pick_best_model_v1.py

示例3: l1FeatureSelection

# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
def l1FeatureSelection():
	X = np.array(trainingData, dtype=float)
	X1 = np.array(testData, dtype=float)
	y = np.array(trainingDataLabels, dtype=float)
	model = LinearSVC(C=0.01, penalty="l1", dual=False)
	newX = model.fit_transform(X, y)
	newX1 = model.transform(X1)
	return (newX, newX1)
开发者ID:quentinperrot,项目名称:stayalert,代码行数:10,代码来源:testing.py

示例4: L1LinearSVC

# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
class L1LinearSVC(LinearSVC):
	def fit(self, X, y):
		self.transformer_ = LinearSVC(penalty="l1", dual=False, tol=1e-3)
		X = self.transformer_.fit_transform(X, y)
		return LinearSVC.fit(self, X, y)

	def predict(self, X):
		X = self.transformer_.transform(X)
		return LinearSVC.predict(self, X)
开发者ID:kumarishan,项目名称:python-ml-tryout,代码行数:11,代码来源:doc_classification_newsgroup.py

示例5: call_GridParamSearch_featfilt

# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
def call_GridParamSearch_featfilt(X, y) :
    '''
        (def is Currently just a cut & paste from "main".)
        Calles def GridParamSearch , (which uses randomized CV to find odel param)
    Used to try different ml models, then get their optimal paramters
    '''
    print("SPARSE (L1) EXT gridparam scores:")
    #   clf = Pipeline([
    #       ('feature_selection', LinearSVC(penalty="l1", loss='l1',dual=False, class_weight='auto')),
    # ('classification', ExtraTreesClassifier(n_jobs=3)
    #   )])
    'Sparse; L1 penalized features selection prior to RF fitting/prediction'
    clf_svm = LinearSVC(penalty="l1", loss='l2', dual=False, class_weight='auto')
    clf_logit = LogisticRegression(penalty="l1", dual=False, class_weight='auto')

    'http://scikit-learn.org/0.13/auto_examples/plot_feature_selection.html'
    print('Original features matrix:')
    print(X.shape)
    # Univariate feature selection with F-test for feature scoring
    # We use the default selection function: the 20% most significant features
    # selector = SelectPercentile(f_classif, percentile=20)
    selector = SelectPercentile(chi2, percentile=20)
    X_anova = selector.fit_transform(X, y)
    print(
        'New (2 f_classif) Using statistical feature selection: features matrix is:')
    print(X_anova.shape)

    # lda = LDA(n_components=10)
    # X_lda = lda.fit_transform(X, y)
    # print('New LDA filtered features matrix:')
    # print(X_lda.shape)

    X_svm = clf_svm.fit_transform(X, y)  #Get Sparse feature selections..
    # print(clf.feature_importances_ )
    print('New sparse (SVM filtered) features matrix:')
    print(X_svm.shape)

    print("Res of SVM fitting of (F scores filtered =2) for more feature selection:")
    X_doubleFilt_svm_f = clf_svm.fit_transform(X_anova, y)
    print(X_doubleFilt_svm_f.shape)
    print("param search on sparse features matrix")
    GridParamSearch(param_dist=Tree_param_dist, clf=clf_EXT, X=X_svm, y=y)
开发者ID:MichaelDoron,项目名称:ProFET,代码行数:44,代码来源:Model_Parameters_CV.py

示例6: L1LinearSVC

# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
class L1LinearSVC(LinearSVC): # Creating new class L1LinearSVC with two methods, fit and predict

    def fit(self, X, y): # This method acts on itself with X and y
        self.transformer_ = LinearSVC(penalty="l1",
                                      dual=False, tol=1e-3) # This is changing all the defaults for LinearSVC
        X = self.transformer_.fit_transform(X, y) # Assigning X with the new parameters for LinearSVC performing fit_transform operation
        return LinearSVC.fit(self, X, y) # Returns the fit with the new X with the default LinearSVC parameters

    def predict(self, X): # Predicts the outcome based on the test dataset X
        X = self.transformer_.transform(X) # Perform a transform on X using the updated defaults for LinearSVC
        return LinearSVC.predict(self, X) # returns the predicted score on the transformed data X
开发者ID:kebaler,项目名称:DAT_SF_5,代码行数:13,代码来源:document_classification_20newsgroups_group3.py

示例7: L1LinearSVC

# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
class L1LinearSVC(LinearSVC):
  def fit(self,X,y):
    #The smaller C , the stronger the regularization.
    #The more regularization, the more sparsity.
    self.transformer_ = LinearSVC(penalty="l1",dual=False,tol=1e-3)
    X = self.transformer_.fit_transform(X,y)
    return LinearSVC.fit(self,X,y)

  def predict(self,X):
    X = self.transformer_.transform(X)
    return LinearSVC.predict(self,X)
开发者ID:PhenixI,项目名称:scikit_learn_Code,代码行数:13,代码来源:text_classification.py

示例8: baseline_model

# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
def baseline_model(X_train,y_train,X_test,y_test):
    
    feature_selection = LinearSVC(C=10, penalty='l1', dual=False)
    X_train_new = feature_selection.fit_transform(X_train, y_train)
    X_test_new = feature_selection.transform(X_test)
    print X_train_new.shape
    svm = LinearSVC(C=1)
    svm.fit(X_train_new, y_train)

    

    predicted = svm.predict(X_test_new)
    
    return predicted
开发者ID:LEONOB2014,项目名称:StockPatternRecognition,代码行数:16,代码来源:SVM.py

示例9: getBestFeatsFromSvm

# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
def getBestFeatsFromSvm(kval):
    clf = LinearSVC(C=0.1)
    clf.fit(X, y)

    clf = LinearSVC(C=0.1)
    X_new = clf.fit_transform(X, y)
    bestFeats = []
    for i in range(3):
        arr = numpy.argsort(clf.coef_[i])[-kval:]
        rev = arr[::-1]
        bestFeats.append(rev)
    return bestFeats
    skf = StratifiedKFold(y, 5)
    accuracies = sklearn.cross_validation.cross_val_score(clf, X, y, cv=skf, n_jobs=8 )
    ret = numpy.argsort(clf.coef_)[-kval:]
    ret.reverse()
    return ret
开发者ID:jagatsastry,项目名称:blogger-age-attribution,代码行数:19,代码来源:classify_svm_coef.py

示例10: LinearSVC_custom

# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
def LinearSVC_custom(header,x_train, y_train,x_test,y_test,color):
    clf = LinearSVC(C=1, penalty="l1", dual=False) #,verbose=1) ,tol=0.0001,fit_intercept=True, intercept_scaling=1)
    clf.fit(x_train.values, y_train.values)
    x_tranformed= clf.fit_transform(x_train.values, y_train.values) # transformed X to its most important features
    clf.predict(x_test.values)
    print "Goodness of fit using the LinearSVC is %f \n \n  " %clf.score(x_test.values, y_test.values) # Goodness of fit
    #clf.coef_  # estimate set of coeffs - This will actually store the coeffs as "0" for the vars we wont be using, so it does the trick of fetching the corresponding indices
    important_features=[]
    m=clf.coef_[0]
    index=0
    for i in m:
        if i == 0:
            pass
        else : #not zero, meaning this atribute defines the transformed dataset from the orignal linear combination data-set
            important_features.append(index)
        index=index+1
    features=[header[i] for i in important_features]
   # returning the set of important features with the corresponding "model" (color of wine is the model)
    print "The important features for %s color are : %s  \n \n " %(color, str(features).replace("'",'').replace("[",'').replace("]",''))
    return features
开发者ID:ekta1007,项目名称:Predicting_wine_quality,代码行数:22,代码来源:wine_model_final.py

示例11: baseline_model

# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
def baseline_model(X_train,y_train,X_test,y_test):

    #dimension reduction
    feature_selection = LinearSVC(C=1, penalty="l1", dual=False)
    X_train_reduced = feature_selection.fit_transform(X_train, y_train)
    X_test_reduced = feature_selection.transform(X_test)

    #metrics learning
    ml = LMNN(k=4,min_iter=50,max_iter=1000, learn_rate=1e-7)
    ml.fit(X_train_reduced,y_train)
    X_train_new = ml.transform(X_train_reduced)
    X_test_new = ml.transform(X_test_reduced)

    neigh = KNeighborsClassifier(n_neighbors=4)
    neigh.fit(X_train_new, y_train)
    predicted = neigh.predict(X_test_new)

    #pickle.dump(ml, open('dist_metrics', 'w'))
    
    return predicted
开发者ID:LEONOB2014,项目名称:StockPatternRecognition,代码行数:22,代码来源:metrics_learning.py

示例12: featureSelection

# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
def featureSelection(X_train,X_test,X_val,y_train,log,tech,C):
    if (tech == 'VarTh'):
        sel = VarianceThreshold(threshold=0.01)
        X_train_new = sel.fit_transform(X_train.todense())
        X_test_new = sel.transform(X_test.todense())
        X_val_new = sel.transform(X_val.todense())
        if (log):
            X_train_new = np.log(X_train_new+1)
            X_test_new = np.log(X_test_new+1)
            X_val_new = np.log(X_val_new+1)
    
    if (tech == 'LinearSVC'):
        mod = LinearSVC(C=C, penalty="l1", dual=False)
        X_train_new = mod.fit_transform(X_train.todense(), y_train)
        X_test_new = mod.transform(X_test.todense())
        X_val_new = mod.transform(X_val.todense())
        if (log):
            X_train_new = np.log(X_train_new+1)
            X_test_new = np.log(X_test_new+1)
            X_val_new = np.log(X_val_new+1)
    return X_train_new, X_test_new , X_val_new
开发者ID:sahuvaibhav,项目名称:Capstone,代码行数:23,代码来源:EntropyFusion.py

示例13: baseline_model

# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
def baseline_model(X_train,y_train,X_test,y_test):
    
    print X_train.shape

    feature_selection = LinearSVC(C=1, penalty="l1", dual=False)
    X_train_new = feature_selection.fit_transform(X_train, y_train)
    X_test_new = feature_selection.transform(X_test)

    
    print X_train_new.shape
    print X_test_new.shape

    F = RandomForestClassifier(n_estimators=300,
                               criterion='gini', 
                               min_samples_split=8, 
                               min_samples_leaf=3, max_features='auto', 
                               max_leaf_nodes=4)
    F.fit(X_train_new,y_train)
    predicted = F.predict(X_test_new)
    
    return predicted
开发者ID:LEONOB2014,项目名称:StockPatternRecognition,代码行数:23,代码来源:RF.py

示例14: WithoutDirty

# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]
class WithoutDirty(object):

    def getData(self, filename):
        X, y = load_svmlight_file(filename)
        X, y = shuffle(X, y)

        return (X, y)


    def getTextData(self):
        return self.getData('../feature_set/lowertext.scale')


    def getTrained(self, X, y):
        clf = LogisticRegression(penalty='l2', tol=1e-6, C=1e-1)
        clf.fit(X, y)

        return clf


    def getTrainedOnClean(self, X, y):
        # perform feature selection
        self.fs = LinearSVC(penalty='l1', dual=False, tol=1e-4,
                C=1e1, multi_class='ovr', fit_intercept=True)
        X_reduced = self.fs.fit_transform(X, y)

        print 'feature dimension:', X_reduced.shape

        # train a classifier
        #clf = LinearSVC(penalty='l2', loss='l2', dual=True, tol=1e-4,
        #        C=1e-1, multi_class='ovr', fit_intercept=True)
        clf = LogisticRegression(penalty='l2', tol=1e-6, C=1e0)
        clf.fit(X_reduced, y)

        # discard examples with low confidence
        pre_scores = clf.predict_proba(X_reduced)
        max_confi = pre_scores.max(axis = 1).tolist()

        idx_max_confi = [[], [], [], []]
        for idx, conf in enumerate(max_confi):
            idx_max_confi[int(y[idx])].append((idx, conf))

        chosen_indices = []
        for label, confs in enumerate(idx_max_confi):
            confs = sorted(confs,
                    key = operator.itemgetter(1),
                    reverse = True)
            max_indices = map(operator.itemgetter(0), confs)
            max_indices = max_indices[:int(len(max_indices) * .9)]
            chosen_indices.extend(max_indices)

        X_clean = X[chosen_indices]
        y_clean = y[chosen_indices]

        print 'cleaned feature dimension', X_clean.shape
        print Counter(y_clean).most_common()

        # train the classifier again with clean data
        clf.fit(X_clean, y_clean)

        return clf


    def getPredicted(self, clf, X):
        #X = self.fs.transform(X)
        predicted = clf.predict(X)

        return predicted


    def run(self):
        X, y = self.getTextData()

        kfold = cross_validation.KFold(X.shape[0], k = 5)
        tester = tests.tester(4)

        for train, test in kfold:
            # in the training stage, we should discard the part of the training
            # data after the feature selection.
            clf = self.getTrained(X[train], y[train])

            predicted = self.getPredicted(clf, X[test])
            tester.record(y[test], predicted)

        print 'accuracy:', tester.accuracy()
        print 'confusion matrix:'
        print tester.confusionMatrix()
开发者ID:pyongjoo,项目名称:twitter-research,代码行数:89,代码来源:ml_discardDirty.py

示例15: LinearSVC

# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import fit_transform [as 别名]

MODEL_NAME = 'model_16_random_forest_calibrated_feature_selection'
MODE = 'cv'  # cv|submission|holdout

# import data
train, labels, test, _, _ = utils.load_data()

# transform counts to TFIDF features
tfidf = feature_extraction.text.TfidfTransformer(smooth_idf=False)
train = np.append(train, tfidf.fit_transform(train).toarray(), axis=1)
test = np.append(test, tfidf.transform(test).toarray(), axis=1)

# feature selection
feat_selector = LinearSVC(C=0.095, penalty='l1', dual=False)
train = feat_selector.fit_transform(train, labels)
test = feat_selector.transform(test)

print train.shape

# encode labels
lbl_enc = preprocessing.LabelEncoder()
labels = lbl_enc.fit_transform(labels)



# train classifier
clf = ensemble.ExtraTreesClassifier(n_jobs=3, n_estimators=600, max_features=20, min_samples_split=3,
                                    bootstrap=False, verbose=3, random_state=23)

if MODE == 'cv':
开发者ID:ShrikanthRamanathan,项目名称:kaggle_otto,代码行数:32,代码来源:random_forest_calibrated_feature_selection.py


注:本文中的sklearn.svm.LinearSVC.fit_transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。