当前位置: 首页>>代码示例>>Python>>正文


Python SGDClassifier.score方法代码示例

本文整理汇总了Python中sklearn.linear_model.SGDClassifier.score方法的典型用法代码示例。如果您正苦于以下问题:Python SGDClassifier.score方法的具体用法?Python SGDClassifier.score怎么用?Python SGDClassifier.score使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.linear_model.SGDClassifier的用法示例。


在下文中一共展示了SGDClassifier.score方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: train_test_bow

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def train_test_bow(ngram_order, batch_size=128, n_epoch=3):
    label_sets = ['full', 'function', '3way', 'in_out', 'man_nat']
    for label_set in label_sets:
        # need to drop unk for full/function
        if label_set in ['full', 'function']:
            df = sentences_df(labels=label_set, drop_unk=True)
        else:
            df = sentences_df(SENTENCES_CSV, labels=label_set, drop_unk=False)
        X, y, word2idx, l_enc = load_dataset(df, ngram_order=ngram_order)
        print "X shape: %s" % (X.shape,)
        print "y shape: %s" % (y.shape,)
        skf = StratifiedKFold(y, n_folds=10, shuffle=True, random_state=0)
        scores = []
        for (train, test) in skf:
            clf = None
            clf = SGDClassifier(loss='log',
                                alpha=0.001,
                                l1_ratio=0,
                                random_state=0)
            for epoch in range(n_epoch):
                X_train, y_train, X_test, y_test = X[train], y[train], X[test], y[test]
                n_batches = X_train.shape[0] // batch_size
                for minibatch_idx in range(n_batches):
                    clf.partial_fit(
                        X_train[minibatch_idx * batch_size : (minibatch_idx+1) * batch_size],
                        y_train[minibatch_idx * batch_size : (minibatch_idx+1) * batch_size],
                        classes=np.unique(y))
                print "Epoch: %d/%d Train acc: %.4f" \
                    % (epoch+1, n_epoch, clf.score(X_train, y_train))
            fold_score = clf.score(X_test, y_test)
            print "Fold acc: %.4f" % fold_score
            scores.append(fold_score)
        print '%s label mean cv accuracy: %.4f\n' % (label_set, np.mean(scores))
开发者ID:cmward,项目名称:text-scene,代码行数:35,代码来源:maxent.py

示例2: evaluate_svm

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def evaluate_svm(alpha):
    # Note: n_iter gets switched to 1 by sklearn whenever you call partial_fit(). This initial
    # setting is for the pretesting of eta0.
    basic_svm = SGDClassifier(loss="hinge", penalty="l2", l1_ratio=0.0, random_state=31337, n_jobs=5,
                              n_iter=5, alpha=alpha)

    learning_rate_grid = [ 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7 ]
    pretest_svm = GridSearchCV(basic_svm,
                               {"learning_rate": ["constant"],
                                "eta0": learning_rate_grid}).fit(X_pretest, y_pretest)
    bottou_gamma0 = pretest_svm.best_params_["eta0"]
    basic_svm.eta0 = bottou_gamma0
    basic_svm.learning_rate = "constant"

    basic_svm = basic_svm.partial_fit(X_pretest, y_pretest, classes = np.unique(y_train))

    progressive_val = []
    train_score = []
    for dp in range(0, X_train.shape[0], batch_size):
        t = dp + n_pretest
        basic_svm.eta0 = bottou_gamma0/(1 + bottou_gamma0*alpha*t)
        X_batch = X_train[dp:dp+batch_size]
        y_batch = y_train[dp:dp+batch_size]
        progressive_val.append(basic_svm.score(X_batch, y_batch))
        basic_svm = basic_svm.partial_fit(X_batch, y_batch)
        train_score.append(basic_svm.score(X_batch, y_batch))

    scores = progressive_val[-batches_for_cv_performance:]
    return np.mean(scores), np.std(scores), basic_svm
开发者ID:nerdcha,项目名称:cs231n,代码行数:31,代码来源:fit_svc.py

示例3: run_SGD

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def run_SGD(X, y, n_tr, n_te):
  X_tr, y_tr, X_te, y_te = X[:n_tr], y[:n_tr], X[-n_te:], y[-n_te:]
  penalties = ['hinge', 'log']
  for p in penalties:
    model = SGDClassifier(loss=p, penalty=None, n_iter=100).fit(X_tr, y_tr)
    print 'Training, validation accuracy is %6.4f and %6.4f for %s loss' % \
        (model.score(X_tr, y_tr), model.score(X_te, y_te), p)
开发者ID:joshua924,项目名称:MachineLearningProject_Team509,代码行数:9,代码来源:train.py

示例4: sgc_test

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def sgc_test(X, y, weight):
    from sklearn.linear_model import SGDClassifier
    from sklearn import cross_validation
    from sklearn.metrics import confusion_matrix
    from sklearn.preprocessing import StandardScaler

    for i in range(0,1):
        X_train, X_test, y_train, y_test, weight_train, weight_test = cross_validation.train_test_split(
            X, y, weight, test_size=0.2, random_state=0)
        clf = SGDClassifier(loss="hinge", n_iter=100, n_jobs=-1, penalty="l2")
        #clf = LogisticRegression( max_iter=100)

        scaler = StandardScaler(with_mean=False)
        scaler.fit(X_train)  # Don't cheat - fit only on training data
        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test)  # apply same transformation to test data

        clf.fit(X_train, y_train, sample_weight=weight_train)

        y_pred = clf.predict(X_train)
        #print(confusion_matrix(y_train, y_pred))
        print(clf.score(X_train,y_train,weight_train))

        y_pred = clf.predict(X_test)

        #print(confusion_matrix(y_test, y_pred))
        print(clf.score(X_test,y_test,weight_test))
开发者ID:organization-lab,项目名称:weibo-predict,代码行数:29,代码来源:regressor.py

示例5: crossvalidation

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
 def crossvalidation(self, rawX, Y):
     trainF = self.genfeature(rawX)
     X_train, X_test, y_train, y_test = cross_validation.train_test_split(trainF, Y, test_size=0.4, random_state=0)
     clf = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)
     print 'svc linear', clf.score(X_test, y_test),clf.coef_
     clf = SGDClassifier(loss="hinge", penalty="l2").fit(X_train,y_train)
     print 'SGDC hinge/l2',clf.score(X_test,y_test),clf.coef_
     clf = neighbors.KNeighborsClassifier(5 , weights='uniform').fit(X_train,y_train)
     print 'KNN 5/uniform',clf.score(X_test,y_test)
开发者ID:siyuqtt,项目名称:independent,代码行数:11,代码来源:util.py

示例6: create_classifier

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
    def create_classifier(self):
        DB.db.connect()
        clf = SGDClassifier( loss="modified_huber")
        labs_map = NameToIndex()

        with DB.db.transaction():
            offset = 0
            words_count = self.get_words_count()
            classes = numpy.arange(0,words_count)
            x_all = []
            y_all = []
            while True:
                print ' %d partial_fit %d'%(time(),offset)
                query = DB.Vocabulary\
                    .select(DB.Vocabulary.lv1, DB.Vocabulary.lv2)\
                    .join(DB.PcaModel, on=(DB.Vocabulary.feature == DB.PcaModel.feature)).order_by( DB.Vocabulary.feature).offset(offset).limit(1000)\
                    .tuples().iterator()
                features = numpy.array(map(lambda x:[x[0]]+list(x[1]),query))
                offset += len(features)
                if len(features) == 0:
                    break

                Y = features[:,0]
                X = features[:,1:]

                labs = []
                for lab in Y:
                    labs.append(labs_map.map(lab))

                if(len(x_all)<10000):
                    x_all = x_all + X.tolist()
                    y_all = y_all + labs
                labs = numpy.array(labs)

                #clf = LinearSVC()
                #clf = OneVsRestClassifier(SVC(probability=True, kernel='linear'))
                #clf.fit(X,labs)
                clf.partial_fit(X,labs,classes)
                print clf.score(x_all,y_all)

            DB.TrainingResult.delete().where(DB.TrainingResult.name == self.__class__.__name__+"_clf").execute()
            DB.TrainingResult.delete().where(DB.TrainingResult.name == self.__class__.__name__+"_labs_map").execute()

            tr = DB.TrainingResult()
            tr.name = self.__class__.__name__+"_clf"
            tr.data = clf
            tr.save()

            tr = DB.TrainingResult()
            tr.name = self.__class__.__name__+"_labs_map"
            tr.data = labs_map
            tr.save()
开发者ID:caoym,项目名称:odr,代码行数:54,代码来源:odr.py

示例7: train_and_pickle_classifier

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def train_and_pickle_classifier():
    import numpy as np
    from sklearn.linear_model import SGDClassifier

    clf = SGDClassifier(loss='log', random_state=1, n_iter=1)

    csv_filename = os.path.join('datasets', 'movie_data.csv')
    doc_stream = stream_docs(path=csv_filename)

    classes = np.array([0, 1])
    for _ in range(45):
        X_train, y_train = get_minibatch(doc_stream, size=1000)
        if X_train is None:
            break
        else:
            X_train = vect.transform(X_train)
            clf.partial_fit(X_train, y_train, classes=classes)

    X_test, y_test = get_minibatch(doc_stream, size=5000)
    X_test = vect.transform(X_test)
    print("Test accuracy: %.3f" % clf.score(X_test, y_test))

    clf = clf.partial_fit(X_test, y_test)

    pickle.dump(clf, open(CLF_FILENAME, 'wb'), protocol=4)
开发者ID:jeremyn,项目名称:python-machine-learning-book,代码行数:27,代码来源:vectorizer.py

示例8: buildModel

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def buildModel(size):
	with open('Sentiment Analysis Dataset.csv', 'rb') as csvfile:
		pos_tweets =[]
		neg_tweets =[]
		spamreader = csv.reader(csvfile, delimiter=',')
		for row in spamreader:
			if row[1] == '1':
				if not (len(pos_tweets) > size):
					pos_tweets.append(_cleanTweet(row[3]))
			else:
				if not (len(neg_tweets) > size):
					neg_tweets.append(_cleanTweet(row[3]))
	y = np.concatenate((np.ones(len(pos_tweets[0:size])), np.zeros(len(neg_tweets[0:size]))))
	x_train, x_test, y_train, y_test = train_test_split(np.concatenate((pos_tweets[0:size], neg_tweets[0:size])), y, test_size=0.2)
	x_train = _cleanText(x_train)
	x_test = _cleanText(x_test)
	n_dim = 100
	#Initialize model and build vocab
	imdb_w2v = Word2Vec(size=n_dim, min_count=10)
	imdb_w2v.build_vocab(x_train)
	imdb_w2v.train(x_train)
	train_vecs = np.concatenate([buildWordVector(z, n_dim,imdb_w2v) for z in x_train])
	train_vecs = scale(train_vecs)
	#Train word2vec on test tweets
	imdb_w2v.train(x_test)
	#Build test tweet vectors then scale
	test_vecs = np.concatenate([buildWordVector(z, n_dim,imdb_w2v) for z in x_test])
	test_vecs = scale(test_vecs)
	lr = SGDClassifier(loss='log', penalty='l1')
	lr.fit(train_vecs, y_train)
	imdb_w2v.save("imdb_w2v")
	f = open("Accuracy.txt","w")
	f.write(str(lr.score(test_vecs, y_test))+" "+str(size*2))
	f.close()
开发者ID:phugiadang,项目名称:CSCI-4308-Open-Sources-Data-Analytics,代码行数:36,代码来源:TweetAnalWord2Vec.py

示例9: test_create_model

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
    def test_create_model(self):
        print("labeled sentence worked?")
        x_train = labelizeReviews(self.xTrain, 'TRAIN')
        x_test = labelizeReviews(self.xTest, 'TEST')
        model_dm = gensim.models.Doc2Vec(min_count=1, window=5, size=self.size, sample=1e-3, negative=5, workers=3)
        model_dbow = gensim.models.Doc2Vec(min_count=1, window=6, size=self.size, sample=1e-3, negative=5, dm=0, workers=3)
        sentences = x_train
        model_dm.build_vocab(sentences)
        model_dbow.build_vocab(sentences)
        # npArray = np.array(x_train)
        for epoch in range(10):
            print("Starting epoch:", str(epoch))
            # perm = np.random.permutation(npArray.shape[0])
            model_dm.train(random.sample(sentences, len(sentences)))
            model_dbow.train(random.sample(sentences, len(sentences)))
        # model_dm.train(x_train)
        train_vecs = getVecs(model_dm, x_train, self.size)
        train_vecs_dbow = getVecs(model_dbow, x_train, self.size)
        train_vecs_total = np.hstack((train_vecs, train_vecs_dbow))

        sentences = x_test
        for epoch in range(10):
            print("Starting epoch:", str(epoch))
            # perm = np.random.permutation(npArray.shape[0])
            model_dm.train(random.sample(sentences, len(sentences)))
            model_dbow.train(random.sample(sentences, len(sentences)))
        test_vecs = getVecs(model_dm, x_train, self.size)
        test_vecs_dbow = getVecs(model_dbow, x_train, self.size)
        test_vecs_total = np.hstack((test_vecs, test_vecs_dbow))
        lr = SGDClassifier(loss='log', penalty='l1')
        lr.fit(train_vecs_total, self.labelsTrain[:self.samples])

        print('Test Accuracy: %.2f'%lr.score(test_vecs_total, self.labelsTest[:self.samples]))
开发者ID:EspenAlbert,项目名称:sentimentAnalysisMovieReviews,代码行数:35,代码来源:test_doc2vec.py

示例10: run_online_classifier

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def run_online_classifier():
    vect = HashingVectorizer(
        decode_error='ignore',
        n_features=2**21,
        preprocessor=None,
        tokenizer=tokenizer_streaming,
    )
    clf = SGDClassifier(loss='log', random_state=1, n_iter=1)

    csv_filename = os.path.join('datasets', 'movie_data.csv')
    doc_stream = stream_docs(path=csv_filename)

    classes = np.array([0, 1])
    for _ in range(45):
        X_train, y_train = get_minibatch(doc_stream, size=1000)
        if X_train is None:
            break
        else:
            X_train = vect.transform(X_train)
            clf.partial_fit(X_train, y_train, classes=classes)

    X_test, y_test = get_minibatch(doc_stream, size=5000)
    X_test = vect.transform(X_test)
    print("Test accuracy: %.3f" % clf.score(X_test, y_test))

    clf = clf.partial_fit(X_test, y_test)
开发者ID:jeremyn,项目名称:python-machine-learning-book,代码行数:28,代码来源:chapter_8.py

示例11: apply_minibatch_sgd

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def apply_minibatch_sgd(datasets, minibatch, epoch=5, cores=1, seed=1):
    ''' Applies the logistic regression sgd method

    :type datasets: list
    :param datasets: List containing training/testing data
    
    :type minibatch: int
    :param minibatch: minibatch size
        
    :type cores: int
    :param cores: Number of cores
    
    :type seed: int
    :param seed: Random seed
    '''
    print 'Applying mini-batch SGD with mini-batch size of ', minibatch
    training_X, training_y = datasets[0]
    testing_X, testing_y = datasets[1]
    print 'Shuffling training data'
    training_X, training_y = shuffle(training_X, training_y, random_state = seed)
    clf = SGDClassifier(loss="log", random_state=seed, n_iter=epoch, verbose=0, n_jobs=cores)
    classes = numpy.unique([-1, 1])
    minibatches = training_X.shape[0]/minibatch + 1
    samples = training_X.shape[0]
    for i in xrange(epoch):
        print "Epoch ", i+1
        for j in xrange(minibatches):
            clf.partial_fit(training_X[j*minibatch:min(samples,(j+1)*minibatch)], training_y[j*minibatch:min(samples,(j+1)*minibatch)], classes=classes)
        print "Accuracy on testing data:", clf.score(testing_X, testing_y)
开发者ID:uci-cbcl,项目名称:DeepCADD,代码行数:31,代码来源:sklearn_CADD_sgd.py

示例12: train

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def train():
    vect = HashingVectorizer(decode_error='ignore',
                             n_features=2**21,
                             preprocessor=None,
                             ngram_range=(1, 3),
                             tokenizer=tokenizer)
    clf = SGDClassifier(loss='log', random_state=1, n_iter=1)
    stream_path = os.path.join(work_path, 'movie_data.csv')
    doc_stream = stream_docs(path=stream_path)

    pbar = pyprind.ProgBar(45)
    classes = np.array([0, 1])
    for _ in range(45):
        X_train, y_train = get_minibatch(doc_stream, size=1000)
        if not X_train:
            break
        X_train = vect.transform(X_train)
        clf.partial_fit(X_train, y_train, classes=classes)
        pbar.update()

    X_test, y_test = get_minibatch(doc_stream, size=5000)
    X_test = vect.transform(X_test)
    print('Accuracy: %.3f' % clf.score(X_test, y_test))

    clf = clf.partial_fit(X_test, y_test)

    return clf
开发者ID:deluxebrain,项目名称:play-python-sentiment-analysis,代码行数:29,代码来源:train.py

示例13: SGD

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def SGD(X,Y,idealArray):
    print "now try SGD classifier"
    from sklearn.linear_model import SGDClassifier
    clf = SGDClassifier(loss="hinge", penalty="l2")
    clf.fit(X,Y)
    print "clf works", clf
    #print clf.coef_

    #print clf.score(shortData,shortLabels)
    #gives 0.908 accuracy!
    #print clf.score(fullData,fullLabels)
    #gives 0.8234 accuracy
    print clf.score(X,Y)
    smallData = shortListGen(idealArray,fullData[0:1000])
    print "got it"
    print clf.score(smallData,shortLabels)
开发者ID:mattrozak,项目名称:448-magic,代码行数:18,代码来源:startup.py

示例14: mine

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def mine():
    print("Starting")
    clf = SGDClassifier(loss='log',random_state=1,n_iter=1)
    print('Create/Load Classifier')
    doc_stream = stream_docs(path='./movie_data.csv')
    print('Fitting data')
    classes = np.array([0,1])
    for _ in range(45):
        X_train, y_train = get_minibatch(doc_stream, size=1000)
        if not X_train:
            break
        X_train = vect.transform(X_train)
        clf.partial_fit(X_train, y_train, classes=classes)
    print('Finished Fitting')

    X_test, y_test = get_minibatch(doc_stream, size=5000)
    X_test = vect.transform(X_test)
    print('Accuracy: %.3f' % clf.score(X_test,y_test))

    print('create pickle objects')
    dest = os.path.join('','pkl_objects')
    if not os.path.exists(dest):
        os.makedirs(dest)

    pickle.dump(stop, open(os.path.join(dest,'stopwords.pkl'),'wb'), protocol=4)
    pickle.dump(clf, open(os.path.join(dest,'classifier.pkl'),'wb'), protocol=4)
开发者ID:lorenzocastillo,项目名称:OpinionMining,代码行数:28,代码来源:OpinionMiner.py

示例15: SGDC_SVM_Classifier

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import score [as 别名]
def SGDC_SVM_Classifier(X_train, X_cv, X_test, Y_train,Y_cv,Y_test, Actual_DS):
    print("***************Starting SVM***************")
    t0 = time()
    clf = SGDClassifier(loss='log', penalty='l2',alpha=1e-5, n_iter=100)
    clf.fit(X_train, Y_train)
    preds = clf.predict(X_cv)
    score = clf.score(X_cv,Y_cv)

    print("{0:.2f}%".format(100 * score))
    Summary = pd.crosstab(label_enc.inverse_transform(Y_cv), label_enc.inverse_transform(preds),
                      rownames=['actual'], colnames=['preds'])
    Summary['pct'] = (Summary.divide(Summary.sum(axis=1), axis=1)).max(axis=1)*100
    print(Summary)

    #Check with log loss function
    epsilon = 1e-15
    #ll_output = log_loss_func(Y_cv, preds, epsilon)
    preds2 = clf.predict_proba(X_cv)
    ll_output2= log_loss(Y_cv, preds2, eps=1e-15, normalize=True)
    print(ll_output2)

    print("done in %0.3fs" % (time() - t0))

    preds3 = clf.predict_proba(X_test)
    #preds4 = clf.predict_proba((Actual_DS.ix[:,'feat_1':]))
    preds4 = clf.predict_proba(Actual_DS)
    print("***************Ending SVM***************")
    return pd.DataFrame(preds2),pd.DataFrame(preds3),pd.DataFrame(preds4)
开发者ID:roshankr,项目名称:DS_Competition,代码行数:30,代码来源:Otto_Classification.py


注:本文中的sklearn.linear_model.SGDClassifier.score方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。