当前位置: 首页>>代码示例>>Python>>正文


Python RidgeClassifier.fit方法代码示例

本文整理汇总了Python中sklearn.linear_model.RidgeClassifier.fit方法的典型用法代码示例。如果您正苦于以下问题:Python RidgeClassifier.fit方法的具体用法?Python RidgeClassifier.fit怎么用?Python RidgeClassifier.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.linear_model.RidgeClassifier的用法示例。


在下文中一共展示了RidgeClassifier.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: retrain_models

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
def retrain_models(username):
	train_x, train_y, body_x, body_y, head_x, head_y = model_retriever.retrieve_data_db(username)

	b_train_x = []
	b_train_y = numpy.concatenate([body_y, train_y])

	for msg in (body_x + train_x):
		b_train_x.append(extract_body_features(msg))

	body_vec = TfidfVectorizer(norm="l2")
	b_train_x = body_vec.fit_transform(b_train_x)

	h_train_x = []
	h_train_y = numpy.concatenate([head_y, train_y])

	for msg in (head_x + train_x):
		h_train_x.append(extract_header_features(msg))

	head_vec = DictVectorizer()
	h_train_x = head_vec.fit_transform(h_train_x)

	body_model = LinearSVC(loss='l2', penalty="l2", dual=False, tol=1e-3)
	head_model = RidgeClassifier(tol=1e-2, solver="lsqr")

	body_model.fit(b_train_x, b_train_y)
	head_model.fit(h_train_x, h_train_y)

        print("Finished training models for "+username+"...")

	store_models(username, body_vec, body_model, head_vec, head_model)
开发者ID:dylanrhodes,项目名称:sigma,代码行数:32,代码来源:offline_updater.py

示例2: train_and_predict_m8

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
def train_and_predict_m8 (train, test, labels) :
    ## Apply basic concatenation + stemming
    trainData, testData = stemmer_clean (train, test, stemmerEnableM7, stemmer_type = 'porter')

    ## TF-IDF transform with sub-linear TF and stop-word removal
    tfv = TfidfVectorizer(min_df = 5, max_features = None, strip_accents = 'unicode', analyzer = 'word', token_pattern = r'\w{1,}', ngram_range = (1, 5), smooth_idf = 1, sublinear_tf = 1, stop_words = ML_STOP_WORDS)
    tfv.fit(trainData)
    X =  tfv.transform(trainData) 
    X_test = tfv.transform(testData)
    
    ## Create the classifier
    print ("Fitting Ridge Classifer...")
    clf = RidgeClassifier(class_weight = 'auto', alpha = 1, normalize = True)
    
    ## Create a parameter grid to search for best parameters for everything in the pipeline
    param_grid = {'alpha' : [0.1, 0.3, 1, 3, 10], 'normalize' : [True, False]}
    
    ## Predict model with best parameters optimized for quadratic_weighted_kappa
    if (gridSearch) :
        model = perform_grid_search (clf, param_grid, X, labels)    	
        pred = model.predict(X_test)
    else :
        clf.fit(X, labels)    	
        pred = clf.predict(X_test)
    return pred
开发者ID:sathishrvijay,项目名称:Kaggle-CrowdFlowerSRR,代码行数:27,代码来源:classifier.py

示例3: run

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
def run(input_train, input_test, output_name):
    """
    Takes a file path as input, a file path as output, and produces a sorted csv of
    item IDs for Kaggle submission
    -------
    input_train : 'full path of the training file'
    input_test : 'full path of the testing file'
    output_name : 'full path of the output file'
    """

    data = pd.read_table(input_train)
    test = pd.read_table(input_test)
    testItemIds = test.itemid
    response = data.is_blocked
    dummies = sparse.csc_matrix(pd.get_dummies(data.subcategory))
    pretestdummies = pd.get_dummies(test.subcategory)
    testdummies = sparse.csc_matrix(pretestdummies.drop(['Растения', 'Товары для компьютера'],axis=1))
    words = np.array(data.description,str)
    testwords = np.array(test.description,str)
    del data, test
    vect = text.CountVectorizer(decode_error = u'ignore', strip_accents='unicode', ngram_range=(1,2))
    corpus = np.concatenate((words, testwords))
    vect.fit(corpus)
    counts = vect.transform(words)
    features = sparse.hstack((dummies,counts))
    clf = RidgeClassifier()
    clf.fit(features, response)
    testcounts = vect.transform(testwords)
    testFeatures = sparse.hstack((testdummies,testcounts))
    predicted_scores = clf.predict_proba(testFeatures).T[1]
    f = open(output_name,'w')
    f.write("id\n") 
    for pred_score, item_id in sorted(zip(predicted_scores, testItemIds), reverse = True):
        f.write("%d\n" % (item_id))
    f.close()
开发者ID:eyedvabny,项目名称:CDIPS-WS-2014,代码行数:37,代码来源:ridge_wordbag.py

示例4: validate

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
def validate(input_train, rows=True, test=0.25):
    """
    Takes file as input and returns classification report, average precision, and
    AUC for a bigram model. By default, loads all rows of a dataset, trains on .75,
    and tests on .25. 
    ----
    input_train : 'full path of the file you are loading'
    rows : True - loads all rows; insert an int for specific number of rows
    test : float proportion of dataset used for testing
    """
    if rows == True:
        data = pd.read_table(input_train)
    else:
        data = pd.read_table(input_train, nrows = rows)
    response = data.is_blocked
    dummies = sparse.csc_matrix(pd.get_dummies(data.subcategory))
    words = np.array(data.description,str)
    del data
    vect = text.CountVectorizer(decode_error = u'ignore',strip_accents='unicode',ngram_range=(1,2))
    counts = vect.fit_transform(words)
    features = sparse.hstack((dummies,counts))
    features_train, features_test, target_train, target_test = train_test_split(features, response, test_size = test)
    clf = RidgeClassifier()
    clf.fit(features_train, target_train)
    prediction = clf.predict(features_test)
    return classification_report(target_test, prediction), average_precision_score(target_test, prediction), roc_auc_score(target_test, prediction)
开发者ID:eyedvabny,项目名称:CDIPS-WS-2014,代码行数:28,代码来源:ridge_wordbag.py

示例5: Eval

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
def Eval(XTrain, YTrain, XTest, YTest, clf, return_predicted_labels=False):
	"""
		Inputs:
			XTrain - N by D matrix of training data vectors
			YTrain - N by 1 matrix of training class labels
			XTest - M by D matrix of testin data vectors
			YTrain - M by 1 matrix of testing class labels
			clstr - the clustering function 
				either the string = "KMeans" or "GMM"
				or a sklearn clustering instance
					with the methods .fit and 
		Outputs:
			A tuple containing (in the following order):
				Accuracy
				Overall Precision
				Overall Recall
				Overall F1 score
				Avg. Precision per class
				Avg. Recall per class
				F1 Score
				Precision per class
				Recall per class
				F1 Score per class
				(if return_predicted_labels)
					predicted class labels for each row in XTest
	"""

	if type(clf) == str:
		if 'ridge' in clf.lower():
			clf = RidgeClassifier(tol=1e-2, solver="lsqr")
		elif "perceptron" in clf.lower():
			clf = Perceptron(n_iter=50)
		elif "passive aggressive" in clf.lower() or 'passive-aggressive' in clf.lower():
			clf = PassiveAggressiveClassifier(n_iter=50)
		elif 'linsvm' in clf.lower() or 'linearsvm' in clf.lower() or 'linearsvc' in clf.lower():
			clf = LinearSVC()
		elif 'svm' in clf.lower() or 'svc' in clf.lower():
			clf = SVC()
		elif 'sgd' in clf.lower():
			clf = SGDClassifier()
   
	clf.fit(XTrain, YTrain)
	YPred = clf.predict(XTest)


	accuracy = sklearn.metrics.accuracy_score(YTest, YPred)
	(overall_precision, overall_recall, overall_f1, support) = sklearn.metrics.precision_recall_fscore_support(YTest, YPred, average='micro')
	(precision_per_class, recall_per_class, f1_per_class, support_per_class) = sklearn.metrics.precision_recall_fscore_support(YTest, YPred)
	avg_precision_per_class = np.mean(precision_per_class)
	avg_recall_per_class = np.mean(recall_per_class)
	avg_f1_per_class = np.mean(f1_per_class)

	del clf

	if return_predicted_labels:
		return (accuracy, overall_precision, overall_recall, overall_f1, avg_precision_per_class, avg_recall_per_class, avg_f1_per_class, precision_per_class, recall_per_class, f1_per_class, YPred)
	else:
		return (accuracy, overall_precision, overall_recall, overall_f1, avg_precision_per_class, avg_recall_per_class, avg_f1_per_class, precision_per_class, recall_per_class, f1_per_class)
开发者ID:nmonath,项目名称:NLPProject,代码行数:60,代码来源:SupervisedLearning.py

示例6: get_optimal_blend_weigth

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
def get_optimal_blend_weigth(exp_, best_param_,
                             folder, fname, model_fname):
    clf = RidgeClassifier()
    X_test, y_test = exp_.get_test_data()
    clf.set_params(**best_param_)
    clf.fit(X_test, y_test)

    # dump2csv optimal linear weight
    names = np.append(np.array(['intercept'], dtype='S100'), X_test.columns.values)
    coefs = np.append(clf.intercept_, clf.coef_).astype(np.float64)
    optimal_linear_weight = pd.DataFrame(coefs.reshape(1,len(coefs)), columns=names)
    optimal_linear_weight.to_csv(os.path.join(Config.get_string('data.path'),
                                              folder,
                                              fname), index=False)

    # dump2cpkle for ridge model
    model_fname = os.path.join(Config.get_string('data.path'), folder, model_fname)
    with gzip.open(model_fname, 'wb') as gf:
        cPickle.dump(clf, gf, cPickle.HIGHEST_PROTOCOL)
    
    return True
开发者ID:Quasi-quant2010,项目名称:Stacking,代码行数:23,代码来源:run_ridge_grid_search.py

示例7: Predict

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
def Predict():
    print('\nThere are %d new deals') % n_test

    # Using the KNN classifier
    clf_KNN = KNeighborsClassifier(n_neighbors=3) # KNN doesnot work even if k has been tuned
    #clf_KNN = KNeighborsClassifier(n_neighbors=7)
    #clf_KNN = KNeighborsClassifier(n_neighbors=11)
    clf_KNN.fit(Corpus_train, Y_train)
    Y_pred_KNN = clf_KNN.predict(Corpus_test)
    print_rate(Y_test, Y_pred_KNN, n_test, 'KNNClassifier')
    
    # Using the SVM classifier
    clf_SVM = svm.SVC()
    clf_SVM.fit(Corpus_train, Y_train)
    Y_pred_SVM = clf_SVM.predict(Corpus_test)
    print_rate(Y_test, Y_pred_SVM, n_test, 'SVMClassifier')
    
    # Using the Ridge classifier
    clf_RC = RidgeClassifier(tol=0.01, solver="lsqr")
    #clf_RC = RidgeClassifier(tol=0.1, solver="lsqr")
    clf_RC.fit(Corpus_train, Y_train)
    Y_pred_RC = clf_RC.predict(Corpus_test)
    print_rate(Y_test, Y_pred_RC, n_test, 'RidgeClassifier')
    
    # won't consider Random Forests or Decision Trees beacause they work bad for high sparse dimensions
    
    
    # Using the Multinomial Naive Bayes classifier
    # I expect that this MNB classifier will do the best since it is designed for occurrence counts features
    #clf_MNB = MultinomialNB(alpha=0.01) #smoothing parameter = 0.01 is worse than 0.1
    clf_MNB = MultinomialNB(alpha=0.1)
    #clf_MNB = MultinomialNB(alpha=0.3) #a big smoothing rate doesnot benefit the model
    #clf_MNB = MultinomialNB(alpha=0.2) #or alpha = 0.05 can generate the best outcome
    clf_MNB.fit(Corpus_train, Y_train)
    Y_pred_MNB = clf_MNB.predict(Corpus_test)
    print_rate(Y_test, Y_pred_MNB, n_test, 'MultinomialNBClassifier')
开发者ID:albingrace,项目名称:QianWan,代码行数:38,代码来源:test_3.py

示例8: test_default_configuration_classify

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
    def test_default_configuration_classify(self):
        for i in range(2):
            X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits',
                                                           make_sparse=False)
            configuration_space = ExtraTreesPreprocessor.get_hyperparameter_search_space()
            default = configuration_space.get_default_configuration()
            preprocessor = ExtraTreesPreprocessor(random_state=1,
                                                  **{hp_name: default[hp_name]
                                                     for hp_name in default})
            preprocessor.fit(X_train, Y_train)
            X_train_trans = preprocessor.transform(X_train)
            X_test_trans = preprocessor.transform(X_test)

            # fit a classifier on top
            classifier = RidgeClassifier()
            predictor = classifier.fit(X_train_trans, Y_train)
            predictions = predictor.predict(X_test_trans)
            accuracy = sklearn.metrics.accuracy_score(predictions, Y_test)
            self.assertAlmostEqual(accuracy, 0.87310261080752882, places=2)
开发者ID:dongzhixiang,项目名称:paramsklearn,代码行数:21,代码来源:test_extra_trees.py

示例9: test_default_configuration_classify

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
    def test_default_configuration_classify(self):
        for i in range(5):
            X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits',
                                                           make_sparse=False)
            configuration_space = KernelPCA.get_hyperparameter_search_space()
            default = configuration_space.get_default_configuration()
            preprocessor = KernelPCA(random_state=1,
                                     **{hp_name: default[hp_name] for hp_name in
                                        default if default[hp_name] is not None})
            preprocessor.fit(X_train, Y_train)
            X_train_trans = preprocessor.transform(X_train)
            X_test_trans = preprocessor.transform(X_test)

            # fit a classifier on top
            classifier = RidgeClassifier()
            predictor = classifier.fit(X_train_trans, Y_train)
            predictions = predictor.predict(X_test_trans)
            accuracy = sklearn.metrics.accuracy_score(predictions, Y_test)
            self.assertAlmostEqual(accuracy, 0.096539162112932606)
开发者ID:Allen1203,项目名称:auto-sklearn,代码行数:21,代码来源:test_kernel_pca.py

示例10: test_default_configuration_classify

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
    def test_default_configuration_classify(self):
        for i in range(2):
            X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits',
                                                           make_sparse=True)
            configuration_space = TruncatedSVD.get_hyperparameter_search_space()
            default = configuration_space.get_default_configuration()
            preprocessor = TruncatedSVD(random_state=1,
                                                  **{hp_name: default[hp_name]
                                                     for hp_name in
                                                     default if default[
                                                      hp_name] is not None})
            preprocessor.fit(X_train, Y_train)
            X_train_trans = preprocessor.transform(X_train)
            X_test_trans = preprocessor.transform(X_test)

            # fit a classifier on top
            classifier = RidgeClassifier()
            predictor = classifier.fit(X_train_trans, Y_train)
            predictions = predictor.predict(X_test_trans)
            accuracy = sklearn.metrics.accuracy_score(predictions, Y_test)
            self.assertAlmostEqual(accuracy, 0.44201578627808136, places=2)
开发者ID:dongzhixiang,项目名称:paramsklearn,代码行数:23,代码来源:test_truncatedSVD.py

示例11: len

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
    data = [ i for i in csv.reader(file(train_file, 'rb')) ]
    data = data[1:] # remove header
    random.shuffle(data)

    X = np.array([ i[1:] for i in data ]).astype(float)
    Y = np.array([ i[0] for i in data ]).astype(int)

    train_cutoff = len(data) * 3/4

    X_train = X[:train_cutoff]
    Y_train = Y[:train_cutoff]
    X_test = X[train_cutoff:]
    Y_test = Y[train_cutoff:]

    classifier = RidgeClassifier(normalize = True, alpha = 1)
    classifier = classifier.fit(X_train, Y_train)
    
    print 'Training error : %s' % (classifier.fit(X_train, Y_train).score(X_train, Y_train))

    Y_predict = classifier.predict(X_test)

    equal = 0
    for i in xrange(len(Y_predict)):
        if Y_predict[i] == Y_test[i]:
            equal += 1

    print 'Accuracy = %s' % (float(equal)/len(Y_predict))


开发者ID:log0,项目名称:digit_recognizer,代码行数:29,代码来源:ridge.py

示例12: print

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
remove = ()

X_train = cityName;

print('Creating the vectorizer and chosing a transform (from raw text to feature)')
vect= TfidfVectorizer(sublinear_tf=True, max_df=0.5)
#vect=CountVectorizer(min_n=1,max_n=2,max_features=1000);

X_train = vect.fit_transform(X_train)


cityClass = RidgeClassifier(tol=1e-7)
countryClass = RidgeClassifier(tol=1e-7)

print('Creating a classifier for cities')
cityClass.fit(X_train,cityCode)
print('Creating a classifier for countries')
countryClass.fit(X_train,countryCode)

print('testing the performance');

testCityNames = vect.transform(cityNameTest);

predictionsCity = countryClass.predict(testCityNames);
predictionsCountry = cityClass.predict(testCityNames);

with open('predictions.csv','w') as csvfile:
        writer = csv.writer(csvfile)
        #for ind in range(0,len(predictionsCountry)):
        #        writer.writerow([str(predictionsCountry[ind]),str(predictionsCity[ind])])
        for predCountry,predCity in zip(predictionsCountry,predictionsCity):
开发者ID:ilciavo,项目名称:ml3,代码行数:33,代码来源:simpleTestingv04.py

示例13: time

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
X_test = X_test_summary+X_test_title+X_test_author
duration = time() - t0
print("n_samples: %d, n_features: %d" % X_test.shape)
print("Done in %fs" % (duration))

def writeToDisk(predn,clfname):
    target="./"+clfname+".txt"
    target=open(target,'w')
    target.write("{}\t{}\n".format("record_id", "topic"))
    for x in zip(testID, predn):
        target.write("{}\t{}\n".format(x[0], x[1]))
    target.close()
    print(clfname," output written to disk.")

clf1=RidgeClassifier(tol=1e-2, solver="lsqr")   #Ridge Classifier
clf1.fit(X_train, y_train)
pred = clf1.predict(X_test)
writeToDisk(pred,"RidgeClassifier")

clf2=MultinomialNB(alpha=.01)                   #Naive Bayes classifier
clf2.fit(X_train, y_train)
pred = clf2.predict(X_test)
writeToDisk(pred,"MultinomialNB")

clf3=BernoulliNB(alpha=.01)                     #Naive Bayes(Bernoulli) classifier
clf3.fit(X_train, y_train)
pred = clf3.predict(X_test)
writeToDisk(pred,"BernoulliNB")

clf4=KNeighborsClassifier(n_neighbors=10)       #KNeighbors Classifier
clf4.fit(X_train, y_train)
开发者ID:spanklekar,项目名称:MicrosoftBingOnline,代码行数:33,代码来源:document_classification_topic_score_v5.py

示例14: get_ridge_plot

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
def get_ridge_plot(best_param_, experiment_, 
                   param_keys_, param_vals_,
                   png_folder,
                   png_fname,
                   score_threshold=0.8):

    parameters = dict(zip(param_keys_, param_vals_))
    del parameters['model_type']

    clf = RidgeClassifier()
    X_train, y_train = experiment_.get_train_data()
    clf.set_params(**best_param_)
    clf.fit(X_train, y_train)    
    best_alpha = best_param_['alpha']
    result = {'alphas':[],
              'coefs':np.zeros( (len(parameters['alpha']), len(X_train.columns.values) + 1) ),
              'scores':[],
              'score':None}


    for i, alpha in enumerate(parameters.get('alpha',None)):
        result['alphas'].append(alpha)
        del best_param_['alpha']
        best_param_['alpha'] = alpha
        clf.set_params(**best_param_)
        clf.fit(X_train, y_train)

        # regularization path
        tmp = np.array([0 for j in xrange(len(X_train.columns.values) + 1)], dtype=np.float32)
        if best_param_['fit_intercept']:
            tmp = np.append(clf.intercept_, clf.coef_)
        else:
            tmp[1:] = clf.intercept_
        result['coefs'][i,:] = tmp
        result['scores'].append(experiment_.get_proba(clf, X_train))
    del X_train, y_train

    # 2. 
    tmp_len = len(experiment_.get_data_col_name())
    index2feature = dict(zip(np.arange(1, tmp_len + 1), 
                             experiment_.get_data_col_name()))
    if best_param_['fit_intercept']:
        index2feature[0] = 'intercept'

    # 3. plot
    gs = GridSpec(2,2)
    ax1 = plt.subplot(gs[:,0])
    ax2 = plt.subplot(gs[0,1])
    ax3 = plt.subplot(gs[1,1])


    # 3.1 feature importance
    labels = np.append(np.array(['intercept'], dtype='S100'), experiment_.get_data_col_name())
    nrows, ncols = result['coefs'].shape
    for ncol in xrange(ncols):
        ax1.plot(np.array(result['alphas']), result['coefs'][:,ncol], label = labels[ncol])
    ax1.legend(loc='best')
    ax1.set_xscale('log')
    ax1.set_title("Regularization Path:%1.3e" % (best_alpha))
    ax1.set_xlabel("alpha", fontsize=10)

    # 3.2 PDF
    X_test, y_test = experiment_.get_test_data()
    result['score'] = clf.decision_function(X_test)
    sns.distplot(result['score'], kde=False, rug=False, ax=ax2)
    ax2.set_title("PDF : Decision_Function")


    # 3.3 CDF
    num_bins = 100
    try:
        counts, bin_edges = np.histogram(result['score'], bins=num_bins, normed=True)
    except:
        counts, bin_edges = np.histogram(result['score'], normed=True)

    cdf = np.cumsum(counts)
    ax3.plot(bin_edges[1:], cdf / cdf.max())
    ax3.set_title("CDF")
    ax3.set_xlabel("Decision_Function:Confidence_Score", fontsize=10)


    png_fname = os.path.join(Config.get_string('data.path'), png_folder, png_fname)
    plt.tight_layout()
    plt.savefig(png_fname)
    plt.close()

    return True
开发者ID:Quasi-quant2010,项目名称:Stacking,代码行数:89,代码来源:run_ridge_grid_search.py

示例15: KNeighborsClassifier

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import fit [as 别名]
#clf_KNN = KNeighborsClassifier(n_neighbors=7)
#clf_KNN = KNeighborsClassifier(n_neighbors=11)
clf_KNN.fit(Corpus_train, Y_train)
Y_pred_KNN = clf_KNN.predict(Corpus_test)
print_rate(Y_test, Y_pred_KNN, n_test, 'KNNClassifier')

# Using the SVM classifier
clf_SVM = svm.SVC()
clf_SVM.fit(Corpus_train, Y_train)
Y_pred_SVM = clf_SVM.predict(Corpus_test)
print_rate(Y_test, Y_pred_SVM, n_test, 'SVMClassifier')

# Using the Ridge classifier
clf_RC = RidgeClassifier(tol=0.01, solver="lsqr")
#clf_RC = RidgeClassifier(tol=0.1, solver="lsqr")
clf_RC.fit(Corpus_train, Y_train)
Y_pred_RC = clf_RC.predict(Corpus_test)
print_rate(Y_test, Y_pred_RC, n_test, 'RidgeClassifier')

# won't consider Random Forests or Decision Trees beacause they work bad for high sparse dimensions


# Using the Multinomial Naive Bayes classifier
# I expect that this MNB classifier will do the best since it is designed for occurrence counts features
#clf_MNB = MultinomialNB(alpha=0.01) #smoothing parameter = 0.01 is worse than 0.1
clf_MNB = MultinomialNB(alpha=0.1)
#clf_MNB = MultinomialNB(alpha=0.3) #a big smoothing rate doesnot benefit the model
#clf_MNB = MultinomialNB(alpha=0.2) #or alpha = 0.05 can generate the best outcome
clf_MNB.fit(Corpus_train, Y_train)
Y_pred_MNB = clf_MNB.predict(Corpus_test)
print_rate(Y_test, Y_pred_MNB, n_test, 'MultinomialNBClassifier')
开发者ID:albingrace,项目名称:QianWan,代码行数:33,代码来源:task3_final.py


注:本文中的sklearn.linear_model.RidgeClassifier.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。