当前位置: 首页>>代码示例>>Python>>正文


Python RidgeClassifier.predict方法代码示例

本文整理汇总了Python中sklearn.linear_model.RidgeClassifier.predict方法的典型用法代码示例。如果您正苦于以下问题:Python RidgeClassifier.predict方法的具体用法?Python RidgeClassifier.predict怎么用?Python RidgeClassifier.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.linear_model.RidgeClassifier的用法示例。


在下文中一共展示了RidgeClassifier.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: train_and_predict_m8

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 别名]
def train_and_predict_m8 (train, test, labels) :
    ## Apply basic concatenation + stemming
    trainData, testData = stemmer_clean (train, test, stemmerEnableM7, stemmer_type = 'porter')

    ## TF-IDF transform with sub-linear TF and stop-word removal
    tfv = TfidfVectorizer(min_df = 5, max_features = None, strip_accents = 'unicode', analyzer = 'word', token_pattern = r'\w{1,}', ngram_range = (1, 5), smooth_idf = 1, sublinear_tf = 1, stop_words = ML_STOP_WORDS)
    tfv.fit(trainData)
    X =  tfv.transform(trainData) 
    X_test = tfv.transform(testData)
    
    ## Create the classifier
    print ("Fitting Ridge Classifer...")
    clf = RidgeClassifier(class_weight = 'auto', alpha = 1, normalize = True)
    
    ## Create a parameter grid to search for best parameters for everything in the pipeline
    param_grid = {'alpha' : [0.1, 0.3, 1, 3, 10], 'normalize' : [True, False]}
    
    ## Predict model with best parameters optimized for quadratic_weighted_kappa
    if (gridSearch) :
        model = perform_grid_search (clf, param_grid, X, labels)    	
        pred = model.predict(X_test)
    else :
        clf.fit(X, labels)    	
        pred = clf.predict(X_test)
    return pred
开发者ID:sathishrvijay,项目名称:Kaggle-CrowdFlowerSRR,代码行数:27,代码来源:classifier.py

示例2: validate

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 别名]
def validate(input_train, rows=True, test=0.25):
    """
    Takes file as input and returns classification report, average precision, and
    AUC for a bigram model. By default, loads all rows of a dataset, trains on .75,
    and tests on .25. 
    ----
    input_train : 'full path of the file you are loading'
    rows : True - loads all rows; insert an int for specific number of rows
    test : float proportion of dataset used for testing
    """
    if rows == True:
        data = pd.read_table(input_train)
    else:
        data = pd.read_table(input_train, nrows = rows)
    response = data.is_blocked
    dummies = sparse.csc_matrix(pd.get_dummies(data.subcategory))
    words = np.array(data.description,str)
    del data
    vect = text.CountVectorizer(decode_error = u'ignore',strip_accents='unicode',ngram_range=(1,2))
    counts = vect.fit_transform(words)
    features = sparse.hstack((dummies,counts))
    features_train, features_test, target_train, target_test = train_test_split(features, response, test_size = test)
    clf = RidgeClassifier()
    clf.fit(features_train, target_train)
    prediction = clf.predict(features_test)
    return classification_report(target_test, prediction), average_precision_score(target_test, prediction), roc_auc_score(target_test, prediction)
开发者ID:eyedvabny,项目名称:CDIPS-WS-2014,代码行数:28,代码来源:ridge_wordbag.py

示例3: Eval

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 别名]
def Eval(XTrain, YTrain, XTest, YTest, clf, return_predicted_labels=False):
	"""
		Inputs:
			XTrain - N by D matrix of training data vectors
			YTrain - N by 1 matrix of training class labels
			XTest - M by D matrix of testin data vectors
			YTrain - M by 1 matrix of testing class labels
			clstr - the clustering function 
				either the string = "KMeans" or "GMM"
				or a sklearn clustering instance
					with the methods .fit and 
		Outputs:
			A tuple containing (in the following order):
				Accuracy
				Overall Precision
				Overall Recall
				Overall F1 score
				Avg. Precision per class
				Avg. Recall per class
				F1 Score
				Precision per class
				Recall per class
				F1 Score per class
				(if return_predicted_labels)
					predicted class labels for each row in XTest
	"""

	if type(clf) == str:
		if 'ridge' in clf.lower():
			clf = RidgeClassifier(tol=1e-2, solver="lsqr")
		elif "perceptron" in clf.lower():
			clf = Perceptron(n_iter=50)
		elif "passive aggressive" in clf.lower() or 'passive-aggressive' in clf.lower():
			clf = PassiveAggressiveClassifier(n_iter=50)
		elif 'linsvm' in clf.lower() or 'linearsvm' in clf.lower() or 'linearsvc' in clf.lower():
			clf = LinearSVC()
		elif 'svm' in clf.lower() or 'svc' in clf.lower():
			clf = SVC()
		elif 'sgd' in clf.lower():
			clf = SGDClassifier()
   
	clf.fit(XTrain, YTrain)
	YPred = clf.predict(XTest)


	accuracy = sklearn.metrics.accuracy_score(YTest, YPred)
	(overall_precision, overall_recall, overall_f1, support) = sklearn.metrics.precision_recall_fscore_support(YTest, YPred, average='micro')
	(precision_per_class, recall_per_class, f1_per_class, support_per_class) = sklearn.metrics.precision_recall_fscore_support(YTest, YPred)
	avg_precision_per_class = np.mean(precision_per_class)
	avg_recall_per_class = np.mean(recall_per_class)
	avg_f1_per_class = np.mean(f1_per_class)

	del clf

	if return_predicted_labels:
		return (accuracy, overall_precision, overall_recall, overall_f1, avg_precision_per_class, avg_recall_per_class, avg_f1_per_class, precision_per_class, recall_per_class, f1_per_class, YPred)
	else:
		return (accuracy, overall_precision, overall_recall, overall_f1, avg_precision_per_class, avg_recall_per_class, avg_f1_per_class, precision_per_class, recall_per_class, f1_per_class)
开发者ID:nmonath,项目名称:NLPProject,代码行数:60,代码来源:SupervisedLearning.py

示例4: Predict

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 别名]
def Predict():
    print('\nThere are %d new deals') % n_test

    # Using the KNN classifier
    clf_KNN = KNeighborsClassifier(n_neighbors=3) # KNN doesnot work even if k has been tuned
    #clf_KNN = KNeighborsClassifier(n_neighbors=7)
    #clf_KNN = KNeighborsClassifier(n_neighbors=11)
    clf_KNN.fit(Corpus_train, Y_train)
    Y_pred_KNN = clf_KNN.predict(Corpus_test)
    print_rate(Y_test, Y_pred_KNN, n_test, 'KNNClassifier')
    
    # Using the SVM classifier
    clf_SVM = svm.SVC()
    clf_SVM.fit(Corpus_train, Y_train)
    Y_pred_SVM = clf_SVM.predict(Corpus_test)
    print_rate(Y_test, Y_pred_SVM, n_test, 'SVMClassifier')
    
    # Using the Ridge classifier
    clf_RC = RidgeClassifier(tol=0.01, solver="lsqr")
    #clf_RC = RidgeClassifier(tol=0.1, solver="lsqr")
    clf_RC.fit(Corpus_train, Y_train)
    Y_pred_RC = clf_RC.predict(Corpus_test)
    print_rate(Y_test, Y_pred_RC, n_test, 'RidgeClassifier')
    
    # won't consider Random Forests or Decision Trees beacause they work bad for high sparse dimensions
    
    
    # Using the Multinomial Naive Bayes classifier
    # I expect that this MNB classifier will do the best since it is designed for occurrence counts features
    #clf_MNB = MultinomialNB(alpha=0.01) #smoothing parameter = 0.01 is worse than 0.1
    clf_MNB = MultinomialNB(alpha=0.1)
    #clf_MNB = MultinomialNB(alpha=0.3) #a big smoothing rate doesnot benefit the model
    #clf_MNB = MultinomialNB(alpha=0.2) #or alpha = 0.05 can generate the best outcome
    clf_MNB.fit(Corpus_train, Y_train)
    Y_pred_MNB = clf_MNB.predict(Corpus_test)
    print_rate(Y_test, Y_pred_MNB, n_test, 'MultinomialNBClassifier')
开发者ID:albingrace,项目名称:QianWan,代码行数:38,代码来源:test_3.py

示例5:

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 别名]
    # pre_all = 0.0
    # rec_all = 0.0
    f1_all = []
    f5_all = []
    acc_all = []
    pre_all = []
    rec_all = []

    # level 1 evaluation
    for train_index, test_index in kf1:

        z_train, z_test = z[train_index], z[test_index]
        y_train, y_test = y[train_index], y[test_index]

        clf.fit(z_train, y_train)
        pred = clf.predict(z_test)

        # metrics
        acc_score = metrics.zero_one_score(y_test, pred)
        pre_score = metrics.precision_score(y_test, pred)
        rec_score = metrics.recall_score(y_test, pred)

        acc_all.append(acc_score)
        pre_all.append(pre_score)
        rec_all.append(rec_score)

    # put the lists into numpy array for calculating the results
    acc_all_array  = np.asarray(acc_all)
    pre_all_array  = np.asarray(pre_all)
    rec_all_array  = np.asarray(rec_all)
开发者ID:YuanhaoSun,项目名称:PPLearn,代码行数:32,代码来源:04_01_Ensemble_Stacking_Prob_1010.py

示例6: BernoulliNB

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 别名]
# print

X_train = X_train.toarray()
X_test = X_test.toarray()

# clf = BernoulliNB(alpha=.1)
# clf = MultinomialNB(alpha=.01)
# clf = KNeighborsClassifier(n_neighbors=3)
clf = RidgeClassifier(tol=1e-1)
# clf = RandomForestClassifier(n_estimators=20, max_depth=None, min_split=3, random_state=42)
# clf = SGDClassifier(alpha=.01, n_iter=50, penalty="l2")
# clf = LinearSVC(loss='l2', penalty='l2', C=1000, dual=False, tol=1e-3)


clf.fit(X_train, y_train)
pred = clf.predict(X_test)

print "y    : ", y_test
print "pred : ", pred
print

# # print out top words for each category
# for i, category in enumerate(categories):
#             top = np.argsort(clf.coef_[i, :])[-20:]
#             print "%s: %s" % (category, " ".join(vocabulary[top]))
#             print
# print
# print


pre_score = metrics.precision_score(y_test, pred)
开发者ID:YuanhaoSun,项目名称:PPLearn,代码行数:33,代码来源:ml_test.py

示例7: the

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 别名]
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    X_den_train, X_den_test = X_den[train_index], X_den[test_index]

    # feed models
    clf_mNB.fit(X_train, y_train)
    clf_kNN.fit(X_train, y_train)
    clf_ridge.fit(X_train, y_train)
    clf_lSVC.fit(X_train, y_train)
    clf_SVC.fit(X_train, y_train)

    # get prediction for this fold run
    pred_mNB    = clf_mNB.predict(X_test)
    pred_kNN    = clf_kNN.predict(X_test)
    pred_ridge  = clf_ridge.predict(X_test)
    pred_lSVC   = clf_lSVC.predict(X_test)
    pred_SVC    = clf_SVC.predict(X_test)

    # update z array for each model
    z_mNB   = np.append(z_mNB    , pred_mNB  , axis=None)
    z_kNN   = np.append(z_kNN    , pred_kNN  , axis=None)
    z_ridge = np.append(z_ridge  , pred_ridge, axis=None)
    z_lSVC  = np.append(z_lSVC   , pred_lSVC , axis=None)
    z_SVC   = np.append(z_SVC    , pred_SVC  , axis=None)


# putting z's from each model into one 2d matrix
# this is the (feature) input, similar as X, for level 1
# In level 1, y is still y.
# z = np.array([z_bNB, z_mNB, z_kNN, z_ridge, z_SGD, z_lSVC, z_SVC, z_tree, z_logis], dtype=np.int32)
开发者ID:YuanhaoSun,项目名称:PPLearn,代码行数:33,代码来源:04_Ensemble_Stacking_Pred.py

示例8: classify

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 别名]
def classify(granularity=10):
    trainDir = path.join(GEOTEXT_HOME, 'processed_data/' + str(granularity).strip() + '_clustered/')
    testDir = path.join(GEOTEXT_HOME, 'processed_data/test')
    data_train = load_files(trainDir, encoding=encoding)
    target = data_train.target
    data_test = load_files(testDir, encoding=encoding)

    categories = data_train.target_names
    
    def size_mb(docs):
        return sum(len(s.encode(encoding)) for s in docs) / 1e6
    
    data_train_size_mb = size_mb(data_train.data)
    data_test_size_mb = size_mb(data_test.data)
    
    print("%d documents - %0.3fMB (training set)" % (
        len(data_train.data), data_train_size_mb))
    print("%d documents - %0.3fMB (test set)" % (
        len(data_test.data), data_test_size_mb))
    print("%d categories" % len(categories))
    print()
    
    # split a training set and a test set
    y_train = data_train.target
    y_test = data_test.target
    
    
    print("Extracting features from the training dataset using a sparse vectorizer")
    t0 = time()
    vectorizer = TfidfVectorizer(use_idf=True, norm='l2', binary=False, sublinear_tf=True, min_df=2, max_df=1.0, ngram_range=(1, 1), stop_words='english')
    X_train = vectorizer.fit_transform(data_train.data)
    duration = time() - t0
    print("done in %fs at %0.3fMB/s" % (duration, data_train_size_mb / duration))
    print("n_samples: %d, n_features: %d" % X_train.shape)
    print()
    
    print("Extracting features from the test dataset using the same vectorizer")
    t0 = time()
    X_test = vectorizer.transform(data_test.data)
    duration = time() - t0
    print("done in %fs at %0.3fMB/s" % (duration, data_test_size_mb / duration))
    print("n_samples: %d, n_features: %d" % X_test.shape)
    print()
    chi = False
    if chi:
        k = 500000
        print("Extracting %d best features by a chi-squared test" % 0)
        t0 = time()
        ch2 = SelectKBest(chi2, k=k)
        X_train = ch2.fit_transform(X_train, y_train)
        X_test = ch2.transform(X_test)
        
        print("done in %fs" % (time() - t0))
        print()
        
    feature_names = np.asarray(vectorizer.get_feature_names())
    # clf = LinearSVC(loss='l2', penalty='l2', dual=True, tol=1e-3)
    clf = RidgeClassifier(tol=1e-2, solver="auto")
    print('_' * 80)
    print("Training: ")
    print(clf)
    
    t0 = time()
    clf.fit(X_train, y_train)
    train_time = time() - t0
    print("train time: %0.3fs" % train_time)

    t0 = time()
    pred = clf.predict(X_test)
    scores = clf.decision_function(X_test)
    print scores.shape
    print pred.shape
    test_time = time() - t0
    print("test time:  %0.3fs" % test_time)

    # score = metrics.f1_score(y_test, pred)
    # print("f1-score:   %0.3f" % score)

    if hasattr(clf, 'coef_'):
        print("dimensionality: %d" % clf.coef_.shape[1])
        print("density: %f" % density(clf.coef_))
        print("top 10 keywords per class:")
        for i, category in enumerate(categories):
            top10 = np.argsort(clf.coef_[i])[-10:]
            print("%s: %s" % (category, " ".join(feature_names[top10])))

    
    sumMeanDistance = 0
    sumMedianDistance = 0
    distances = []
    confidences = []
    randomConfidences = []
    
    for i in range(0, len(pred)):
        user = path.basename(data_test.filenames[i])
        location = userLocation[user].split(',')
        lat = float(location[0])
        lon = float(location[1])
        prediction = categories[pred[i]]
        confidence = scores[i][pred[i]] - mean(scores[i])
#.........这里部分代码省略.........
开发者ID:afshinrahimi,项目名称:textylon,代码行数:103,代码来源:rollergeolocation.py

示例9: the

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 别名]
    X_train_train, X_train_test = X_train[train_index], X_train[test_index]
    y_train_train, y_train_test = y_train[train_index], y_train[test_index]

    # X_den_train, X_den_test = X_den[train_index], X_den[test_index]

    # feed models
    clf_mNB.fit(X_train_train, y_train_train)
    clf_kNN.fit(X_train_train, y_train_train)
    clf_ridge.fit(X_train_train, y_train_train)
    clf_lSVC.fit(X_train_train, y_train_train)
    clf_SVC.fit(X_train_train, y_train_train)

    # get prediction for this fold run
    pred_mNB    = clf_mNB.predict(X_train_test)
    pred_kNN    = clf_kNN.predict(X_train_test)
    pred_ridge  = clf_ridge.predict(X_train_test)
    pred_lSVC   = clf_lSVC.predict(X_train_test)
    pred_SVC    = clf_SVC.predict(X_train_test)

    # update z array for each model
    z_mNB   = np.append(z_mNB    , pred_mNB  , axis=None)
    z_kNN   = np.append(z_kNN    , pred_kNN  , axis=None)
    z_ridge = np.append(z_ridge  , pred_ridge, axis=None)
    z_lSVC  = np.append(z_lSVC   , pred_lSVC , axis=None)
    z_SVC   = np.append(z_SVC    , pred_SVC  , axis=None)


# putting z's from each model into one 2d matrix
# this is the (feature) input, similar as X, for level 1
# In level 1, y is still y.
# z = np.array([z_bNB, z_mNB, z_kNN, z_ridge, z_SGD, z_lSVC, z_SVC, z_tree, z_logis], dtype=np.int32)
开发者ID:YuanhaoSun,项目名称:PPLearn,代码行数:33,代码来源:05_Test_stacking_pred.py

示例10: KNeighborsClassifier

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 别名]
#clf_KNN = KNeighborsClassifier(n_neighbors=11)
clf_KNN.fit(Corpus_train, Y_train)
Y_pred_KNN = clf_KNN.predict(Corpus_test)
print_rate(Y_test, Y_pred_KNN, n_test, 'KNNClassifier')

# Using the SVM classifier
clf_SVM = svm.SVC()
clf_SVM.fit(Corpus_train, Y_train)
Y_pred_SVM = clf_SVM.predict(Corpus_test)
print_rate(Y_test, Y_pred_SVM, n_test, 'SVMClassifier')

# Using the Ridge classifier
clf_RC = RidgeClassifier(tol=0.01, solver="lsqr")
#clf_RC = RidgeClassifier(tol=0.1, solver="lsqr")
clf_RC.fit(Corpus_train, Y_train)
Y_pred_RC = clf_RC.predict(Corpus_test)
print_rate(Y_test, Y_pred_RC, n_test, 'RidgeClassifier')

# won't consider Random Forests or Decision Trees beacause they work bad for high sparse dimensions


# Using the Multinomial Naive Bayes classifier
# I expect that this MNB classifier will do the best since it is designed for occurrence counts features
#clf_MNB = MultinomialNB(alpha=0.01) #smoothing parameter = 0.01 is worse than 0.1
clf_MNB = MultinomialNB(alpha=0.1)
#clf_MNB = MultinomialNB(alpha=0.3) #a big smoothing rate doesnot benefit the model
#clf_MNB = MultinomialNB(alpha=0.2) #or alpha = 0.05 can generate the best outcome
clf_MNB.fit(Corpus_train, Y_train)
Y_pred_MNB = clf_MNB.predict(Corpus_test)
print_rate(Y_test, Y_pred_MNB, n_test, 'MultinomialNBClassifier')
#score = metrics.f1_score(Y_test, Y_pred_MNB)
开发者ID:albingrace,项目名称:QianWan,代码行数:33,代码来源:task3_final.py

示例11: time

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 别名]
duration = time() - t0
print("n_samples: %d, n_features: %d" % X_test.shape)
print("Done in %fs" % (duration))

def writeToDisk(predn,clfname):
    target="./"+clfname+".txt"
    target=open(target,'w')
    target.write("{}\t{}\n".format("record_id", "topic"))
    for x in zip(testID, predn):
        target.write("{}\t{}\n".format(x[0], x[1]))
    target.close()
    print(clfname," output written to disk.")

clf1=RidgeClassifier(tol=1e-2, solver="lsqr")   #Ridge Classifier
clf1.fit(X_train, y_train)
pred = clf1.predict(X_test)
writeToDisk(pred,"RidgeClassifier")

clf2=MultinomialNB(alpha=.01)                   #Naive Bayes classifier
clf2.fit(X_train, y_train)
pred = clf2.predict(X_test)
writeToDisk(pred,"MultinomialNB")

clf3=BernoulliNB(alpha=.01)                     #Naive Bayes(Bernoulli) classifier
clf3.fit(X_train, y_train)
pred = clf3.predict(X_test)
writeToDisk(pred,"BernoulliNB")

clf4=KNeighborsClassifier(n_neighbors=10)       #KNeighbors Classifier
clf4.fit(X_train, y_train)
pred = clf4.predict(X_test)
开发者ID:spanklekar,项目名称:MicrosoftBingOnline,代码行数:33,代码来源:document_classification_topic_score_v5.py

示例12: print

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 别名]
X_train = cityName;

print('Creating the vectorizer and chosing a transform (from raw text to feature)')
vect= TfidfVectorizer(sublinear_tf=True, max_df=0.5)
#vect=CountVectorizer(min_n=1,max_n=2,max_features=1000);

X_train = vect.fit_transform(X_train)


cityClass = RidgeClassifier(tol=1e-7)
countryClass = RidgeClassifier(tol=1e-7)

print('Creating a classifier for cities')
cityClass.fit(X_train,cityCode)
print('Creating a classifier for countries')
countryClass.fit(X_train,countryCode)

print('testing the performance');

testCityNames = vect.transform(cityNameTest);

predictionsCity = countryClass.predict(testCityNames);
predictionsCountry = cityClass.predict(testCityNames);

with open('predictions.csv','w') as csvfile:
        writer = csv.writer(csvfile)
        #for ind in range(0,len(predictionsCountry)):
        #        writer.writerow([str(predictionsCountry[ind]),str(predictionsCity[ind])])
        for predCountry,predCity in zip(predictionsCountry,predictionsCity):
                writer.writerow([predCountry,predCity])
开发者ID:ilciavo,项目名称:ml3,代码行数:32,代码来源:simpleTestingv04.py

示例13: the

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 别名]
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        X_den_train, X_den_test = X_den[train_index], X_den[test_index]

        # feed models
        clf_mNB.fit(X_train, y_train)
        # clf_kNN.fit(X_train, y_train)
        clf_ridge.fit(X_train, y_train)
        clf_lSVC.fit(X_train, y_train)
        clf_SVC.fit(X_den_train, y_train)

        # get prediction for this fold run
        pred_mNB    = clf_mNB.predict(X_test)
        # pred_kNN    = clf_kNN.predict(X_test)
        pred_ridge  = clf_ridge.predict(X_test)
        pred_lSVC   = clf_lSVC.predict(X_test)
        pred_SVC    = clf_SVC.predict(X_den_test)

        # update z array for each model
        z_mNB   = np.append(z_mNB    , pred_mNB  , axis=None)
        # z_kNN   = np.append(z_kNN    , pred_kNN  , axis=None)
        z_ridge = np.append(z_ridge  , pred_ridge, axis=None)
        z_lSVC  = np.append(z_lSVC   , pred_lSVC , axis=None)
        z_SVC   = np.append(z_SVC    , pred_SVC  , axis=None)


    # putting z's from each model into one 2d matrix
    # this is the (feature) input, similar as X, for level 1
    # In level 1, y is still y.
    # z = np.array([z_bNB, z_mNB, z_kNN, z_ridge, z_SGD, z_lSVC, z_SVC, z_tree, z_logis], dtype=np.int32)
开发者ID:YuanhaoSun,项目名称:PPLearn,代码行数:32,代码来源:04_01_Ensemble_Stacking_Pred_1010.py

示例14: train_test_split

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 别名]
#!/usr/bin/env python
"""
Ridge regression for Avito
"""
__author__ = "deniederhut"
__license__ = "GPL"
import numpy as np
import pandas as pd
from sklearn.linear_model import RidgeClassifier
from sklearn.metrics import classification_report
from sklearn.cross_validation import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score

data = pd.read_table('/Users/dillonniederhut/Desktop/avito_train.tsv',nrows=100000)
#replace with file path to your training data

features = pd.get_dummies(data.subcategory)
features_train, features_test, target_train, target_test =\
    train_test_split(features, data.is_blocked, test_size = 0.25)

ridge = RidgeClassifier()
ridge.fit(features_train, target_train)
prediction = np.round(ridge.predict(features_test))
print classification_report(target_test, prediction)
print average_precision_score(target_test, prediction)
print roc_auc_score(target_test, prediction)
开发者ID:eyedvabny,项目名称:CDIPS-WS-2014,代码行数:29,代码来源:ridge_benchmark.py

示例15: len

# 需要导入模块: from sklearn.linear_model import RidgeClassifier [as 别名]
# 或者: from sklearn.linear_model.RidgeClassifier import predict [as 别名]
    data = [ i for i in csv.reader(file(train_file, 'rb')) ]
    data = data[1:] # remove header
    random.shuffle(data)

    X = np.array([ i[1:] for i in data ]).astype(float)
    Y = np.array([ i[0] for i in data ]).astype(int)

    train_cutoff = len(data) * 3/4

    X_train = X[:train_cutoff]
    Y_train = Y[:train_cutoff]
    X_test = X[train_cutoff:]
    Y_test = Y[train_cutoff:]

    classifier = RidgeClassifier(normalize = True, alpha = 1)
    classifier = classifier.fit(X_train, Y_train)
    
    print 'Training error : %s' % (classifier.fit(X_train, Y_train).score(X_train, Y_train))

    Y_predict = classifier.predict(X_test)

    equal = 0
    for i in xrange(len(Y_predict)):
        if Y_predict[i] == Y_test[i]:
            equal += 1

    print 'Accuracy = %s' % (float(equal)/len(Y_predict))


开发者ID:log0,项目名称:digit_recognizer,代码行数:29,代码来源:ridge.py


注:本文中的sklearn.linear_model.RidgeClassifier.predict方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。