当前位置: 首页>>代码示例>>Python>>正文


Python linear_model.SGDClassifier类代码示例

本文整理汇总了Python中sklearn.linear_model.SGDClassifier的典型用法代码示例。如果您正苦于以下问题:Python SGDClassifier类的具体用法?Python SGDClassifier怎么用?Python SGDClassifier使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了SGDClassifier类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: sgd_classifier

def sgd_classifier(V_train, y_train, V_val, y_val, V_test, y_test):

    t0 = time.time()

    print 'Building Random Forest model'

    clf = SGDClassifier(n_iter = 50)

    #clf = grid_search.GridSearchCV(svm_clf, parameters)                                                                                                                            

    clf.fit(V_train, y_train)

    #print clf.best_params_                                                                                                                                                         

    t1 = time.time()
    print 'Building Random Forest model ... Done', str(int((t1 - t0)*100)/100.)
    print ''

    p_val =clf.predict(V_val)

    print 'Training accuracy on validation set', accuracy_score(y_val, p_val)

    p_test = clf.predict(V_test)

    print 'Accuracy on testing set'

    print classification_report(y_test, p_test)
开发者ID:HACP,项目名称:RHETORICS,代码行数:27,代码来源:MLlib.py

示例2: run_online_classifier

def run_online_classifier():
    vect = HashingVectorizer(
        decode_error='ignore',
        n_features=2**21,
        preprocessor=None,
        tokenizer=tokenizer_streaming,
    )
    clf = SGDClassifier(loss='log', random_state=1, n_iter=1)

    csv_filename = os.path.join('datasets', 'movie_data.csv')
    doc_stream = stream_docs(path=csv_filename)

    classes = np.array([0, 1])
    for _ in range(45):
        X_train, y_train = get_minibatch(doc_stream, size=1000)
        if X_train is None:
            break
        else:
            X_train = vect.transform(X_train)
            clf.partial_fit(X_train, y_train, classes=classes)

    X_test, y_test = get_minibatch(doc_stream, size=5000)
    X_test = vect.transform(X_test)
    print("Test accuracy: %.3f" % clf.score(X_test, y_test))

    clf = clf.partial_fit(X_test, y_test)
开发者ID:jeremyn,项目名称:python-machine-learning-book,代码行数:26,代码来源:chapter_8.py

示例3: test_underflow_or_overlow

def test_underflow_or_overlow():
    with np.errstate(all="raise"):
        # Generate some weird data with hugely unscaled features
        rng = np.random.RandomState(0)
        n_samples = 100
        n_features = 10

        X = rng.normal(size=(n_samples, n_features))
        X[:, :2] *= 1e300
        assert_true(np.isfinite(X).all())

        # Use MinMaxScaler to scale the data without introducing a numerical
        # instability (computing the standard deviation naively is not possible
        # on this data)
        X_scaled = MinMaxScaler().fit_transform(X)
        assert_true(np.isfinite(X_scaled).all())

        # Define a ground truth on the scaled data
        ground_truth = rng.normal(size=n_features)
        y = (np.dot(X_scaled, ground_truth) > 0.0).astype(np.int32)
        assert_array_equal(np.unique(y), [0, 1])

        model = SGDClassifier(alpha=0.1, loss="squared_hinge", n_iter=500)

        # smoke test: model is stable on scaled data
        model.fit(X_scaled, y)
        assert_true(np.isfinite(model.coef_).all())

        # model is numerically unstable on unscaled data
        msg_regxp = (
            r"Floating-point under-/overflow occurred at epoch #.*"
            " Scaling input data with StandardScaler or MinMaxScaler"
            " might help."
        )
        assert_raises_regexp(ValueError, msg_regxp, model.fit, X, y)
开发者ID:richlewis42,项目名称:scikit-learn,代码行数:35,代码来源:test_sgd.py

示例4: __init__

class LightModel:
    def __init__(self,learningRate, numEpochs, ppenalty="l1", mustShuffle=True):
        #Init scikit models
        self.Classifier = SGDClassifier(penalty=ppenalty, loss='log', alpha=learningRate, n_iter = numEpochs, shuffle=mustShuffle)
    def train(self, gen,  v=False):
        i = 0
        for x, y in gen: #For each batch
            self.Classifier.partial_fit(x, y, [0,1])
            i += len(x)
            if v : print(str(datetime.now())[:-7] , "example:", i)
            
    def test(self, gen,  v=False):

        #init target and prediction arrays
        ytot = np.array([])
        ptot = np.array([])
        #Get prediction for each batch
        i = 0
        for x,y in gen:
            p = self.Classifier.predict_proba(x)
            p = p.T[1].T #Keep column corresponding to probability of class 1
            #Stack target and prediction for later analysis
            ytot = np.hstack((ytot, y)) 
            ptot = np.hstack((ptot, p))
            i += y.shape[0]
            if v : print(str(datetime.now())[:-7] , "example:", i)
        if v: print("Score:", self.score(ytot, ptot))
        
        return (ytot, ptot)
    def score(self, target, prediction):
        return llfun(target, prediction)
开发者ID:EtienneDesticourt,项目名称:Kaggle-Avazu,代码行数:31,代码来源:LightModel.py

示例5: validate

def validate():
  """
  Runs a 10-fold cross validation on the classifier, reporting
  accuracy.
  """
  trainDf = pd.read_csv("../NewData/train.csv")
  X = np.matrix(pd.DataFrame(trainDf, index=None,
    columns=["invited", "user_reco", "evt_p_reco", "evt_c_reco",
    "user_pop", "frnd_infl", "evt_pop"]))
  y = np.array(trainDf.interested)
  nrows = len(trainDf)
  kfold = KFold(nrows, 10)
  avgAccuracy = 0
  run = 0
  for train, test in kfold:
    Xtrain, Xtest, ytrain, ytest = X[train], X[test], y[train], y[test]
    clf = SGDClassifier(loss="log", penalty="l2")
    clf.fit(Xtrain, ytrain)
    accuracy = 0
    ntest = len(ytest)
    for i in range(0, ntest):
      yt = clf.predict(Xtest[i, :])
      if yt == ytest[i]:
        accuracy += 1
    accuracy = accuracy / ntest
    print "accuracy (run %d): %f" % (run, accuracy)
    avgAccuracy += accuracy
    run += 1
  print "Average accuracy", (avgAccuracy / run)
开发者ID:ChrisBg,项目名称:mlia-examples,代码行数:29,代码来源:RecoWeights.py

示例6: do_classify

def do_classify():
    corpus = MyCorpus()
    # tfidf_model = TfidfModel(corpus)
    corpus_idf = tfidf_model[corpus]
    # corpus_lsi = lsi_model[corpus_idf]
    num_terms = len(corpus.dictionary)
    # num_terms = 400
    corpus_sparse = matutils.corpus2csc(corpus_idf, num_terms).transpose(copy=False)
    # print corpus_sparse.shape
    # corpus_dense = matutils.corpus2dense(corpus_idf, len(corpus.dictionary))
    # print corpus_dense.shape
    penalty = "l2"
    clf = SGDClassifier(loss="hinge", penalty=penalty, alpha=0.0001, n_iter=50, fit_intercept=True)
    # clf = LinearSVC(loss='l2', penalty=penalty, dual=False, tol=1e-3)
    y = np.array(corpus.cls_y)
    # print y.shape
    clf.fit(corpus_sparse, y)
    filename = os.path.join(HERE, "sgdc_clf.pkl")
    _ = joblib.dump(clf, filename, compress=9)
    print "train completely"

    X_test = []
    X_label = []
    for obj in SogouCorpus.objects.filter(id__in=corpus.test_y):
        X_test.append(obj.tokens)
        X_label.append(cls_ids[obj.classify])
        # result = classifier.predict(obj.tokens)
    test_corpus = [dictionary.doc2bow(s.split(",")) for s in X_test]
    test_corpus = tfidf_model[test_corpus]
    test_corpus = matutils.corpus2csc(test_corpus, num_terms).transpose(copy=False)
    pred = clf.predict(test_corpus)
    score = metrics.f1_score(X_label, pred)
    print ("f1-score:   %0.3f" % score)
开发者ID:jannson,项目名称:Similar,代码行数:33,代码来源:summ.py

示例7: classify_reviews

def classify_reviews():
	import featurizer
	import gen_training_data
	import numpy as np
	from sklearn.naive_bayes import MultinomialNB
	from sklearn.linear_model import SGDClassifier

	data = gen_training_data.gen_data();
	stemmed_data = featurizer.stem(data);
	tfidf= featurizer.tfidf(data);
	clf = MultinomialNB().fit(tfidf['train_tfidf'], data['training_labels']);
	predicted = clf.predict(tfidf['test_tfidf']);
	num_wrong = 0;
	tot = 0;
	for expected, guessed in zip(data['testing_labels'], predicted):
		if(expected-guessed != 0):	
			num_wrong += 1;

	print("num_wrong: %d",num_wrong)

	sgd_clf = SGDClassifier(loss='hinge', penalty='l2', alpha=1e-3, n_iter=5, random_state=42);
	_ = sgd_clf.fit(tfidf['train_tfidf'], data['training_labels']);
	sgd_pred = sgd_clf.predict(tfidf['test_tfidf']);
	print np.mean(sgd_pred == data['testing_labels']);

	stem_tfidf = featurizer.tfidf(stemmed_data);
	_ = sgd_clf.fit(stem_tfidf['train_tfidf'], data['training_labels']);
	sgd_stem_prd = sgd_clf.predict(stem_tfidf['test_tfidf']);
	print np.mean(sgd_stem_prd==data['testing_labels']);
开发者ID:JT17,项目名称:445Project,代码行数:29,代码来源:classifier.py

示例8: train

def train(docs, labels, regu=1, bg_weight=.1):
    '''
    :param docs: iterator of (title, body) pairs
    :param labels: integer labels for docs (0 is weakly-negative)
    :return: model
    '''
    num_topics=50
    feas = map(extract_words,  docs)
    labels = np.array(list(labels), dtype=int)
    idf=train_idf(feas)
    X,vocab=extract_feas(feas, idf)
    #lda=train_lda(X, vocab, num_topics)
    #X=transform_lda(X, lda)
    # set up sample weights
    weights = balance_weights(labels, bg_weight)
    labels=labels.copy()
    labels[labels == 0] = 1
    model=SGDClassifier(loss='log',
                        alpha=regu/len(labels),
                        fit_intercept=True,
                        n_iter=100,
                        shuffle=True)
    model.fit(X, labels, sample_weight=weights)
    #print accuracy(labels, model.predict(X))
    return dict(idf=idf, logreg=model, lda=None)
开发者ID:jseppanen,项目名称:textpile,代码行数:25,代码来源:model.py

示例9: crossvalidate

def crossvalidate(feas, labels, param):
    labels = np.array(list(labels), dtype=int)
    accs = []
    for train_ids, valid_ids in StratifiedKFold(labels, 10):
        idf=train_idf([feas[i] for i in train_ids])
        X,vocab=extract_feas(feas, idf)
        #lda=train_lda(X, vocab, num_topics)
        #X=transform_lda(X, lda)
        labels_train = labels[train_ids].copy()
        weights = balance_weights(labels_train, param['bg_weight'])
        labels_train[labels_train == 0] = 1
        model=SGDClassifier(loss='log',
                            alpha=param['regu']/len(labels_train),
                            fit_intercept=True,
                            shuffle=True, n_iter=50)
        model.fit(X[train_ids], labels_train, sample_weight=weights)
        pp = model.predict_proba(X[valid_ids])
        pred_labels = np.argmax(pp, 1)
        pred_labels = model.classes_[pred_labels]
        #a=accuracy(labels[valid_ids], pred_labels, 1)
        # return all scores for "good" class
        assert model.classes_[1] == 2
        pred_scores = pp[:,1]
        a=avg_precision(labels[valid_ids], pred_scores)
        print '%.2f' % a,
        accs.append(a)
    return np.mean(accs)
开发者ID:jseppanen,项目名称:textpile,代码行数:27,代码来源:model.py

示例10: plot_sgd_classifier

def plot_sgd_classifier(num_samples, clt_std):
    #generation of data
    X, y = make_blobs(n_samples=num_samples, centers=2, cluster_std=clt_std)

    #fitting of data using logistic regression
    clf = SGDClassifier(loss='log', alpha=0.01)
    clf.fit(X, y)

    #plotting of data
    x_ = np.linspace(min(X[:, 0]), max(X[:, 0]), 10)
    y_ = np.linspace(min(X[:, 1]), max(X[:, 1]), 10)

    X_, Y_ = np.meshgrid(x_, y_)
    Z = np.empty(X_.shape)

    for (i, j), val in np.ndenumerate(X_):
        x1 = val
        x2 = Y_[i, j]
        conf_score = clf.decision_function([x1, x2])
        Z[i, j] = conf_score[0]

    levels = [-1.0, 0, 1.0]
    colors = 'k'
    linestyles = ['dashed', 'solid', 'dashed']

    ax = plt.axes()
    plt.xlabel('X1')
    plt.ylabel('X2')
    ax.contour(X_, Y_, Z, colors=colors,
               levels=levels, linestyles=linestyles, labels='Boundary')
    ax.scatter(X[:, 0], X[:, 1], c=y)
开发者ID:abinashpanda,项目名称:ml_tutorial,代码行数:31,代码来源:SGD_Classification.py

示例11: kernelsvm

class kernelsvm():
    def __init__(self, theta0, alpha, loss_metric):
        self.theta0 = theta0
        self.alpha = alpha
        self.loss_metric = loss_metric
    def fit(self, X, y, idx_SR):
        n_SR = len(idx_SR)
        self.feature_map_nystroem = General_Nystroem(kernel='rbf', gamma=self.theta0, n_components=n_SR)
        X_features = self.feature_map_nystroem.fit_transform(X,idx_SR)
        print("fitting SGD")
        self.clf = SGDClassifier(loss=self.loss_metric,alpha=self.alpha)
        self.clf.fit(X_features, y)
        print("fitting SGD finished")
    def predict(self, X):
        print("Predicting")
        X_transform = self.feature_map_nystroem.transform(X)
        return self.clf.predict(X_transform), X_transform
    def decision_function(self, X):
        # X should be the transformed input!
        return self.clf.decision_function(X)
    def err_rate(self, y_true, y_pred):
        acc = accuracy_score(y_true, y_pred)
        err_rate = 1.0-acc
        return err_rate
    def get_params(self):
        return self.clf.get_params()
开发者ID:Zheng-JIA,项目名称:kernelsubsampling,代码行数:26,代码来源:svm.py

示例12: run_SGD

def run_SGD(X, y, n_tr, n_te):
  X_tr, y_tr, X_te, y_te = X[:n_tr], y[:n_tr], X[-n_te:], y[-n_te:]
  penalties = ['hinge', 'log']
  for p in penalties:
    model = SGDClassifier(loss=p, penalty=None, n_iter=100).fit(X_tr, y_tr)
    print 'Training, validation accuracy is %6.4f and %6.4f for %s loss' % \
        (model.score(X_tr, y_tr), model.score(X_te, y_te), p)
开发者ID:joshua924,项目名称:MachineLearningProject_Team509,代码行数:7,代码来源:train.py

示例13: stochasticGD

def stochasticGD(input_file,Output,test_size):
    lvltrace.lvltrace("LVLEntree dans stochasticGD split_test")
    ncol=tools.file_col_coma(input_file)
    data = np.loadtxt(input_file, delimiter=',', usecols=range(ncol-1))
    X = data[:,1:]
    y = data[:,0]
    n_samples, n_features = X.shape
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    print X_train.shape, X_test.shape
    clf = SGDClassifier(loss="hinge", penalty="l2")
    clf.fit(X_train,y_train)
    y_pred = clf.predict(X_test)
    print "Stochastic Gradient Descent "
    print "classification accuracy:", metrics.accuracy_score(y_test, y_pred)
    print "precision:", metrics.precision_score(y_test, y_pred)
    print "recall:", metrics.recall_score(y_test, y_pred)
    print "f1 score:", metrics.f1_score(y_test, y_pred)
    print "\n"
    results = Output+"Stochastic_GD_metrics_test.txt"
    file = open(results, "w")
    file.write("Stochastic Gradient Descent estimator accuracy\n")
    file.write("Classification Accuracy Score: %f\n"%metrics.accuracy_score(y_test, y_pred))
    file.write("Precision Score: %f\n"%metrics.precision_score(y_test, y_pred))
    file.write("Recall Score: %f\n"%metrics.recall_score(y_test, y_pred))
    file.write("F1 Score: %f\n"%metrics.f1_score(y_test, y_pred))
    file.write("\n")
    file.write("True Value, Predicted Value, Iteration\n")
    for n in xrange(len(y_test)):
        file.write("%f,%f,%i\n"%(y_test[n],y_pred[n],(n+1)))
    file.close()
    title = "Stochastic Gradient Descent %f"%test_size
    save = Output + "Stochastic_GD_confusion_matrix"+"_%s.png"%test_size
    plot_confusion_matrix(y_test, y_pred,title,save)
开发者ID:xaviervasques,项目名称:Neuron_Morpho_Classification_ML,代码行数:33,代码来源:supervised_split_test.py

示例14: train_stochaticGradientDescent

def train_stochaticGradientDescent(X, y, loss='hinge', penalty='l2', alpha=0.0001, l1_ratio=0.15,
                                   fit_intercept=True, n_iter=5, shuffle=True, verbose=0,
                                   epsilon=0.1, n_jobs=1, random_state=None, learning_rate='optimal',
                                   eta0=0.0, power_t=0.5, class_weight=None, warm_start=False,
                                   average=False):
    clf = SGDClassifier(loss=loss,
                        penalty=penalty,
                        alpha=alpha,
                        l1_ratio=l1_ratio,
                        fit_intercept=fit_intercept,
                        n_iter=n_iter,
                        shuffle=shuffle,
                        verbose=verbose,
                        epsilon=epsilon,
                        n_jobs=n_jobs,
                        random_state=random_state,
                        learning_rate=learning_rate,
                        eta0=eta0,
                        power_t=power_t,
                        class_weight=class_weight,
                        warm_start=warm_start,
                        average=average
                        )
    clf = clf.fit(X,y)
    return clf
开发者ID:LatencyTDH,项目名称:Pykit-Learn,代码行数:25,代码来源:classification_utils.py

示例15: SGD

def SGD(x, y):
#Using Stochastic Gradient Descent of Sklearn
	from sklearn.linear_model import SGDClassifier
	clf = SGDClassifier()
	clf.fit(x, y)

	return clf.predict(x)
开发者ID:keymanesh,项目名称:Coursera_Stanford_Machine-Learning,代码行数:7,代码来源:plotData.py


注:本文中的sklearn.linear_model.SGDClassifier类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。