当前位置: 首页>>代码示例>>Python>>正文


Python SGDClassifier.predict方法代码示例

本文整理汇总了Python中sklearn.linear_model.SGDClassifier.predict方法的典型用法代码示例。如果您正苦于以下问题:Python SGDClassifier.predict方法的具体用法?Python SGDClassifier.predict怎么用?Python SGDClassifier.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.linear_model.SGDClassifier的用法示例。


在下文中一共展示了SGDClassifier.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: sgc_test

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def sgc_test(X, y, weight):
    from sklearn.linear_model import SGDClassifier
    from sklearn import cross_validation
    from sklearn.metrics import confusion_matrix
    from sklearn.preprocessing import StandardScaler

    for i in range(0,1):
        X_train, X_test, y_train, y_test, weight_train, weight_test = cross_validation.train_test_split(
            X, y, weight, test_size=0.2, random_state=0)
        clf = SGDClassifier(loss="hinge", n_iter=100, n_jobs=-1, penalty="l2")
        #clf = LogisticRegression( max_iter=100)

        scaler = StandardScaler(with_mean=False)
        scaler.fit(X_train)  # Don't cheat - fit only on training data
        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test)  # apply same transformation to test data

        clf.fit(X_train, y_train, sample_weight=weight_train)

        y_pred = clf.predict(X_train)
        #print(confusion_matrix(y_train, y_pred))
        print(clf.score(X_train,y_train,weight_train))

        y_pred = clf.predict(X_test)

        #print(confusion_matrix(y_test, y_pred))
        print(clf.score(X_test,y_test,weight_test))
开发者ID:organization-lab,项目名称:weibo-predict,代码行数:29,代码来源:regressor.py

示例2: test_multi_output_classification_partial_fit

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def test_multi_output_classification_partial_fit():
    # test if multi_target initializes correctly with base estimator and fit
    # assert predictions work as expected for predict

    sgd_linear_clf = SGDClassifier(loss='log', random_state=1)
    multi_target_linear = MultiOutputClassifier(sgd_linear_clf)

    # train the multi_target_linear and also get the predictions.
    half_index = X.shape[0] // 2
    multi_target_linear.partial_fit(
        X[:half_index], y[:half_index], classes=classes)

    first_predictions = multi_target_linear.predict(X)
    assert_equal((n_samples, n_outputs), first_predictions.shape)

    multi_target_linear.partial_fit(X[half_index:], y[half_index:])
    second_predictions = multi_target_linear.predict(X)
    assert_equal((n_samples, n_outputs), second_predictions.shape)

    # train the linear classification with each column and assert that
    # predictions are equal after first partial_fit and second partial_fit
    for i in range(3):
        # create a clone with the same state
        sgd_linear_clf = clone(sgd_linear_clf)
        sgd_linear_clf.partial_fit(
            X[:half_index], y[:half_index, i], classes=classes[i])
        assert_array_equal(sgd_linear_clf.predict(X), first_predictions[:, i])
        sgd_linear_clf.partial_fit(X[half_index:], y[half_index:, i])
        assert_array_equal(sgd_linear_clf.predict(X), second_predictions[:, i])
开发者ID:MechCoder,项目名称:scikit-learn,代码行数:31,代码来源:test_multioutput.py

示例3: algo

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def algo(a):
    global data
    global week 
    target = data['target']
    data = data[["id", "cpu", "creator", "dbs" , "dtype" , "era" ,  "nblk" , "nevt" , "nfiles" , "nlumis" , "nrel" , "nsites" , "nusers" , "parent" , "primds" , "proc_evts" , "procds" , "rnaccess" , "rnusers" , "rtotcpu" , "size" , "tier" , "totcpu" , "wct", "naccess"]]
    week['target'] = 0
    week['target'] = week.apply(convert, axis=1)
    week['target'] = week['target'].astype(int)
    test1 = week
    week = week[["id", "cpu", "creator", "dbs" , "dtype" , "era" ,  "nblk" , "nevt" , "nfiles" , "nlumis" , "nrel" , "nsites" , "nusers" , "parent" , "primds" , "proc_evts" , "procds" , "rnaccess" , "rnusers" , "rtotcpu" , "size" , "tier" , "totcpu" , "wct", "naccess"]]
    if a == 'rf':
        #RANDOM FOREST CLASSIFIER 
        rf = RandomForestClassifier(n_estimators=100)
        rf = rf.fit(data, target)
	predictions = rf.predict(week)
	cal_score("RANDOM FOREST", rf, predictions, test1['target'])
    if a == "sgd":
        #SGD CLASSIFIER     
        clf = SGDClassifier(alpha=0.0001, class_weight=None, epsilon=0.1, eta0=0.0,
            fit_intercept=True, l1_ratio=0.15, learning_rate='optimal',
            loss='hinge', n_iter=5, n_jobs=1, penalty='l2', power_t=0.5,
            random_state=None, shuffle=True, verbose=0,
            warm_start=False)
        clf.fit(data, target)
        predictions = clf.predict(week)
	cal_score("SGD Regression",clf, predictions, test1['target'])
    if a == "nb":
	clf = GaussianNB()
	clf.fit(data, target)
	predictions = clf.predict(week)
	cal_score("NAIVE BAYES", clf, predictions, test1['target'])
开发者ID:mmeoni,项目名称:LHCDataAnalysis,代码行数:33,代码来源:classifier.py

示例4: classify_reviews

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def classify_reviews():
	import featurizer
	import gen_training_data
	import numpy as np
	from sklearn.naive_bayes import MultinomialNB
	from sklearn.linear_model import SGDClassifier

	data = gen_training_data.gen_data();
	stemmed_data = featurizer.stem(data);
	tfidf= featurizer.tfidf(data);
	clf = MultinomialNB().fit(tfidf['train_tfidf'], data['training_labels']);
	predicted = clf.predict(tfidf['test_tfidf']);
	num_wrong = 0;
	tot = 0;
	for expected, guessed in zip(data['testing_labels'], predicted):
		if(expected-guessed != 0):	
			num_wrong += 1;

	print("num_wrong: %d",num_wrong)

	sgd_clf = SGDClassifier(loss='hinge', penalty='l2', alpha=1e-3, n_iter=5, random_state=42);
	_ = sgd_clf.fit(tfidf['train_tfidf'], data['training_labels']);
	sgd_pred = sgd_clf.predict(tfidf['test_tfidf']);
	print np.mean(sgd_pred == data['testing_labels']);

	stem_tfidf = featurizer.tfidf(stemmed_data);
	_ = sgd_clf.fit(stem_tfidf['train_tfidf'], data['training_labels']);
	sgd_stem_prd = sgd_clf.predict(stem_tfidf['test_tfidf']);
	print np.mean(sgd_stem_prd==data['testing_labels']);
开发者ID:JT17,项目名称:445Project,代码行数:31,代码来源:classifier.py

示例5: sgd_classifier

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def sgd_classifier(V_train, y_train, V_val, y_val, V_test, y_test):

    t0 = time.time()

    print 'Building Random Forest model'

    clf = SGDClassifier(n_iter = 50)

    #clf = grid_search.GridSearchCV(svm_clf, parameters)                                                                                                                            

    clf.fit(V_train, y_train)

    #print clf.best_params_                                                                                                                                                         

    t1 = time.time()
    print 'Building Random Forest model ... Done', str(int((t1 - t0)*100)/100.)
    print ''

    p_val =clf.predict(V_val)

    print 'Training accuracy on validation set', accuracy_score(y_val, p_val)

    p_test = clf.predict(V_test)

    print 'Accuracy on testing set'

    print classification_report(y_test, p_test)
开发者ID:HACP,项目名称:RHETORICS,代码行数:29,代码来源:MLlib.py

示例6: scikit_GDS

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def scikit_GDS(x,y, X_test,y_test=None, prevmodel="yes", output=False):
    from sklearn.linear_model import SGDClassifier
    from sklearn.externals import joblib

    clf = SGDClassifier(loss="hinge", penalty="l2")
    ##
    if prevmodel !="yes":
    	clf.fit(X, y)
    	joblib.dump(clf, 'trained_GDS_model.pkl') 
    else:
    	clf =joblib.load('trained_GDS_model.pkl')

    if output == False:
        predictions =  clf.predict(X_test)
        correctcount = 0
        totalcount = 0
        for index, each in enumerate(predictions):
        	if y_test[index] == each:
        		correctcount +=1
        	totalcount+=1

        print str(correctcount) +" / " + str(totalcount) +" = " + str(float(correctcount)/totalcount)
    else:
        predictions =  clf.predict(X_test)
        return predictions
开发者ID:yongbin999,项目名称:kaggle_whats-cooking,代码行数:27,代码来源:scikit_models.py

示例7: main

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def main(which='NB'):
    print 'reading training data'
    training_data, phrase_to_id = read_data(source='dat/train.tsv')

    print 'getting features'
    global global_features 
    global adjectives
    global_features = get_features(get_all_words(training_data))
    with open('adj', 'r') as adj_file:
        for adj in adj_file:
            adjectives.append(adj.lower().rstrip())
    print 'entering switch'
    if which == 'NB':
        training_set = nltk.classify.util.apply_features(extract_features, get_phrase_list(training_data, True)) 
        print 'moving to classifier creation'
        start = time.clock()
        classifier = nltk.NaiveBayesClassifier.train(training_set)
        print 'classfier total time: ', str(time.clock() - start)
        #classifier = SklearnClassifier(BernoulliNB()).train(training_set)
 
        pickle.dump(classifier, open('classifier.pickle', 'w'))
               
        text = raw_input('Next test (q to quit):')
        while text != 'q':
            print classifier.classify(extract_features(text.split()))
	    text = raw_input('Next test (q to quit):')
    elif which == 'SGD':
        print 'extracting features'
	training_set = nltk.classify.util.apply_features(extract_features, get_phrase_list(training_data)) 
        training_list = []
        for d in training_set:
            sample = []
            for k, v in d.iteritems():
                sample.append(v)
            training_list.append(sample)
        label_set = [int(tup[1]) for tup in training_data]
	print 'moving to classifier creation'
        clf = SGDClassifier(loss="hinge", penalty="l2")
	print 'moving to training'
        clf.fit(training_list, label_set)
        pickle.dump(clf, open('sgd_sent.pickle', 'w'))
 
	print 'moving to prediction'
        pred =[]
        pred.append('i hate everyhing')
        pred.append('i love everything')
        pred_set = nltk.classify.util.apply_features(extract_features, pred)
        pred_list = []
        print pred_set
        for d in pred_set:
            inst_list = []
	    for k, v in d.iteritems():
                inst_list.append(v)
            pred_list.append(inst_list)
        print pred_list
        print clf.predict(pred_list)
开发者ID:ARKilgore,项目名称:nlp_utils,代码行数:58,代码来源:sentiment.py

示例8: SGD

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
class SGD(CrossDomainClassifier):
    """
    Stochastic Gradient Descent with Tfidf
    """
    def train(self, limit_data=None):
        if not hasattr(self, 'reviews'):
            print "No data loaded"
            return

        if limit_data is None:
            limit_data = len(self.reviews)

        X = self.get_bag_of_ngrams(self.reviews[:limit_data])
        self.clf = SGDClassifier(loss="modified_huber", alpha=0.001, penalty="l2").fit(X, self.labels[:limit_data])

    def __test(self, reviews, labels):
        X_training_counts = self.count_vect.transform(reviews)
        X_training_tfidf = self.tfidf_transformer.transform(X_training_counts)

        predicted = self.clf.predict(X_training_tfidf)
        self.cm = confusion_matrix(labels, predicted)

        return 1 - np.mean(predicted == labels)

    def get_training_error(self):
        return self.__test(self.reviews, self.labels)

    def get_generalized_error(self):
        return self.__test(self.test_reviews, self.test_labels)

    def get_crossdomain_error(self):
        return {'twitter': self.__test(self.twitter_items, self.twitter_labels),
                'ebay': self.__test(self.ebay_items, self.ebay_labels)}

    def __get_scores(self, reviews, labels):
        X_training_counts = self.count_vect.transform(reviews)
        X_training_tfidf = self.tfidf_transformer.transform(X_training_counts)

        predicted = self.clf.predict(X_training_tfidf)
        self.cm = confusion_matrix(labels, predicted)

        return precision_recall_fscore_support(labels, predicted, average='macro')

    def get_scores_training(self):
        return self.__get_scores(self.reviews, self.labels)

    def get_scores_test(self):
        return self.__get_scores(self.test_reviews, self.test_labels)

    def get_scores_twitter(self):
        return self.__get_scores(self.twitter_items, self.twitter_labels)

    def get_scores_ebay(self):
        return self.__get_scores(self.ebay_items, self.ebay_labels)
开发者ID:lukedeo,项目名称:cross-domain,代码行数:56,代码来源:classifier.py

示例9: chi_feature_select

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def chi_feature_select(train_file, test_file):

    lines = read_text_src(train_file)
    lines = [x for x in lines if len(x)>1]
    X_train = [line[1] for line in lines]
    y_train = [line[0] for line in lines]

    lines = read_text_src(test_file)
    lines = [x for x in lines if len(x) > 1]
    X_test = [line[1] for line in lines]
    y_test = [line[0] for line in lines]

    vectorizer = TfidfVectorizer(tokenizer=zh_tokenize)#ngram_range=(1,2)
    X_train = vectorizer.fit_transform(X_train)
    print X_train.shape

    X_test = vectorizer.transform(X_test)

    # word = vectorizer.get_feature_names()


    # N = X_train.shape[1]
    # ch2 = SelectKBest(chi2, k=int(N*0.2)) #.fit_transform(X, y)
    #
    #
    # X_train = ch2.fit_transform(X_train, y_train)
    # X_test = ch2.transform(X_test)

    # feature_names = [word[i] for i
    #                  in ch2.get_support(indices=True)]
    #

    # for i in feature_names:
    #     print i.encode('utf-8')
    # feature_names = np.asarray(feature_names)
    # print feature_names
    # clf = LinearSVC(penalty="l1", dual=False, tol=1e-3)

    # clf.fit(X_train, y_train)
    clf = SGDClassifier(loss="log", penalty='l1')
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    prob = clf.predict_proba(X_test[0])
    print prob
    X=["市场经济复苏,互联网公司蓬勃发展","世纪大战终于开启,勇士引得第73胜"]
    Y=['1','0']
    X=vectorizer.transform(X)
    clf.partial_fit(X,Y, classes=['0','1'])
    tmpx=['暴风科技股价大跌',"世纪大战终于开启,勇士引得第73胜"]
    tmpX=vectorizer.transform(tmpx)
    pred = clf.predict(tmpX)
    print pred
开发者ID:actlea,项目名称:TopicalCrawler,代码行数:54,代码来源:build_dict.py

示例10: solve

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def solve(exp, X_train, y_train, X_test, y_test, seed):
    X_train = X_train.reshape(X_train.shape[0], -1)
    X_test = X_test.reshape(X_test.shape[0], -1)
    loss = exp["loss"]
    reg = exp["reg"]
    verbose = exp["verbose"]
    if (loss == "softmax"):
        y_train_pred, y_test_pred = softmax_gn(X_train, y_train, X_test, y_test, reg, verbose=True)
    else:
        clf = SGDClassifier(loss=loss, random_state=RANDOM_STATE, alpha=reg, verbose=int(verbose))
        clf.fit(X_train, y_train)
        y_train_pred = clf.predict(X_train)
        y_test_pred = clf.predict(X_test)
    return y_train_pred, y_test_pred
开发者ID:akmorrow13,项目名称:ckm,代码行数:16,代码来源:experiments.py

示例11: run_regression

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def run_regression(train_embeds, train_labels, test_embeds, test_labels):
    np.random.seed(1)
    from sklearn.linear_model import SGDClassifier
    from sklearn.dummy import DummyClassifier
    from sklearn.metrics import accuracy_score
    dummy = DummyClassifier()
    dummy.fit(train_embeds, train_labels)
    log = SGDClassifier(loss="log", n_jobs=55)
    log.fit(train_embeds, train_labels)
    print("Test scores")
    print(accuracy_score(test_labels, log.predict(test_embeds)))
    print("Train scores")
    print(accuracy_score(train_labels, log.predict(train_embeds)))
    print("Random baseline")
    print(accuracy_score(test_labels, dummy.predict(test_embeds)))
开发者ID:hammadhaleem,项目名称:FastGCN,代码行数:17,代码来源:train_batch_multiRank_inductive_reddit_onelayer.py

示例12: SGDCls

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
class SGDCls(object):
    """docstring for ClassName"""
    def __init__(self):
        self.sgd_cls = SGDClassifier()
        self.prediction = None
        self.train_x = None
        self.train_y = None

    def train_model(self, train_x, train_y):
        try:
            self.train_x = train_x
            self.train_y = train_y
            print(self.train_y)
            self.sgd_cls.fit(train_x, train_y)
        except:
            print(traceback.format_exc())

    def predict(self, test_x):
        try:
            self.prediction = self.sgd_cls.predict(test_x)
            return self.prediction
        except:
            print(traceback.format_exc())

    def accuracy_score(self, test_y):
        try:
            return r2_score(test_y, self.prediction)
        except:
            print(traceback.format_exc())
开发者ID:obaid22192,项目名称:machine-learning,代码行数:31,代码来源:classifiers.py

示例13: runSGDPipeline

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def runSGDPipeline(entries, langs):
	t0 = time()
	sgd_pipeline = Pipeline([('vect', CountVectorizer(ngram_range=(1,1), max_features=n_features)),
                      ('tfidf', TfidfTransformer(use_idf=True)),
                      ('clf', SGDClassifier(loss='squared_hinge', penalty='l2',
                                            alpha=0.001, n_iter=5, random_state=42))])

	vect = CountVectorizer(ngram_range=(1,1), max_features=n_features)
	X_train_counts = vect.fit_transform(entries)
	tfidf = TfidfTransformer(use_idf=True).fit(X_train_counts)
	X_train_tfidf = tfidf.fit_transform(X_train_counts)

	clf = SGDClassifier(loss='squared_hinge', penalty='l2', alpha=0.001, n_iter=5, random_state=42)
	clf.fit(X_train_tfidf, langs)

	X_new_counts = vect.transform(entries)
	X_new_tfidf = tfidf.transform(X_new_counts)
	predicted = clf.predict(X_new_tfidf.toarray())

	print(np.mean(predicted == langs))
	print(metrics.classification_report(langs, predicted, target_names=langs))
	print(metrics.confusion_matrix(langs, predicted))
	print("Took %s seconds." % (time()-t0))
	print("n_samples: %d, n_features: %d" % X_train_tfidf.shape)
	return sgd_pipeline
开发者ID:squidnee,项目名称:lingo-bean,代码行数:27,代码来源:baselineClassifications.py

示例14: kernelsvm

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
class kernelsvm():
    def __init__(self, theta0, alpha, loss_metric):
        self.theta0 = theta0
        self.alpha = alpha
        self.loss_metric = loss_metric
    def fit(self, X, y, idx_SR):
        n_SR = len(idx_SR)
        self.feature_map_nystroem = General_Nystroem(kernel='rbf', gamma=self.theta0, n_components=n_SR)
        X_features = self.feature_map_nystroem.fit_transform(X,idx_SR)
        print("fitting SGD")
        self.clf = SGDClassifier(loss=self.loss_metric,alpha=self.alpha)
        self.clf.fit(X_features, y)
        print("fitting SGD finished")
    def predict(self, X):
        print("Predicting")
        X_transform = self.feature_map_nystroem.transform(X)
        return self.clf.predict(X_transform), X_transform
    def decision_function(self, X):
        # X should be the transformed input!
        return self.clf.decision_function(X)
    def err_rate(self, y_true, y_pred):
        acc = accuracy_score(y_true, y_pred)
        err_rate = 1.0-acc
        return err_rate
    def get_params(self):
        return self.clf.get_params()
开发者ID:Zheng-JIA,项目名称:kernelsubsampling,代码行数:28,代码来源:svm.py

示例15: SGD_lassifier

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict [as 别名]
def SGD_lassifier(X_train, categories, X_test, test_categories):
    from sklearn.linear_model import SGDClassifier
    from sklearn.metrics import confusion_matrix
    clf = SGDClassifier(alpha=.0001, n_iter=50).fit(X_train, categories)
    y_pred = clf.predict(X_test)
    print '\n Here is the classification report for SGD classifier:'
    print metrics.classification_report(test_categories, y_pred)
开发者ID:LewkowskiArkadiusz,项目名称:magisterka,代码行数:9,代码来源:train.py


注:本文中的sklearn.linear_model.SGDClassifier.predict方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。