当前位置: 首页>>代码示例>>Python>>正文


Python SGDClassifier.partial_fit方法代码示例

本文整理汇总了Python中sklearn.linear_model.SGDClassifier.partial_fit方法的典型用法代码示例。如果您正苦于以下问题:Python SGDClassifier.partial_fit方法的具体用法?Python SGDClassifier.partial_fit怎么用?Python SGDClassifier.partial_fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.linear_model.SGDClassifier的用法示例。


在下文中一共展示了SGDClassifier.partial_fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: train

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def train():
    vect = HashingVectorizer(decode_error='ignore',
                             n_features=2**21,
                             preprocessor=None,
                             ngram_range=(1, 3),
                             tokenizer=tokenizer)
    clf = SGDClassifier(loss='log', random_state=1, n_iter=1)
    stream_path = os.path.join(work_path, 'movie_data.csv')
    doc_stream = stream_docs(path=stream_path)

    pbar = pyprind.ProgBar(45)
    classes = np.array([0, 1])
    for _ in range(45):
        X_train, y_train = get_minibatch(doc_stream, size=1000)
        if not X_train:
            break
        X_train = vect.transform(X_train)
        clf.partial_fit(X_train, y_train, classes=classes)
        pbar.update()

    X_test, y_test = get_minibatch(doc_stream, size=5000)
    X_test = vect.transform(X_test)
    print('Accuracy: %.3f' % clf.score(X_test, y_test))

    clf = clf.partial_fit(X_test, y_test)

    return clf
开发者ID:deluxebrain,项目名称:play-python-sentiment-analysis,代码行数:29,代码来源:train.py

示例2: test_multi_output_classification_partial_fit

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def test_multi_output_classification_partial_fit():
    # test if multi_target initializes correctly with base estimator and fit
    # assert predictions work as expected for predict

    sgd_linear_clf = SGDClassifier(loss='log', random_state=1)
    multi_target_linear = MultiOutputClassifier(sgd_linear_clf)

    # train the multi_target_linear and also get the predictions.
    half_index = X.shape[0] // 2
    multi_target_linear.partial_fit(
        X[:half_index], y[:half_index], classes=classes)

    first_predictions = multi_target_linear.predict(X)
    assert_equal((n_samples, n_outputs), first_predictions.shape)

    multi_target_linear.partial_fit(X[half_index:], y[half_index:])
    second_predictions = multi_target_linear.predict(X)
    assert_equal((n_samples, n_outputs), second_predictions.shape)

    # train the linear classification with each column and assert that
    # predictions are equal after first partial_fit and second partial_fit
    for i in range(3):
        # create a clone with the same state
        sgd_linear_clf = clone(sgd_linear_clf)
        sgd_linear_clf.partial_fit(
            X[:half_index], y[:half_index, i], classes=classes[i])
        assert_array_equal(sgd_linear_clf.predict(X), first_predictions[:, i])
        sgd_linear_clf.partial_fit(X[half_index:], y[half_index:, i])
        assert_array_equal(sgd_linear_clf.predict(X), second_predictions[:, i])
开发者ID:MechCoder,项目名称:scikit-learn,代码行数:31,代码来源:test_multioutput.py

示例3: train_and_pickle_classifier

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def train_and_pickle_classifier():
    import numpy as np
    from sklearn.linear_model import SGDClassifier

    clf = SGDClassifier(loss='log', random_state=1, n_iter=1)

    csv_filename = os.path.join('datasets', 'movie_data.csv')
    doc_stream = stream_docs(path=csv_filename)

    classes = np.array([0, 1])
    for _ in range(45):
        X_train, y_train = get_minibatch(doc_stream, size=1000)
        if X_train is None:
            break
        else:
            X_train = vect.transform(X_train)
            clf.partial_fit(X_train, y_train, classes=classes)

    X_test, y_test = get_minibatch(doc_stream, size=5000)
    X_test = vect.transform(X_test)
    print("Test accuracy: %.3f" % clf.score(X_test, y_test))

    clf = clf.partial_fit(X_test, y_test)

    pickle.dump(clf, open(CLF_FILENAME, 'wb'), protocol=4)
开发者ID:jeremyn,项目名称:python-machine-learning-book,代码行数:27,代码来源:vectorizer.py

示例4: __init__

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
class LightModel:
    def __init__(self,learningRate, numEpochs, ppenalty="l1", mustShuffle=True):
        #Init scikit models
        self.Classifier = SGDClassifier(penalty=ppenalty, loss='log', alpha=learningRate, n_iter = numEpochs, shuffle=mustShuffle)
    def train(self, gen,  v=False):
        i = 0
        for x, y in gen: #For each batch
            self.Classifier.partial_fit(x, y, [0,1])
            i += len(x)
            if v : print(str(datetime.now())[:-7] , "example:", i)
            
    def test(self, gen,  v=False):

        #init target and prediction arrays
        ytot = np.array([])
        ptot = np.array([])
        #Get prediction for each batch
        i = 0
        for x,y in gen:
            p = self.Classifier.predict_proba(x)
            p = p.T[1].T #Keep column corresponding to probability of class 1
            #Stack target and prediction for later analysis
            ytot = np.hstack((ytot, y)) 
            ptot = np.hstack((ptot, p))
            i += y.shape[0]
            if v : print(str(datetime.now())[:-7] , "example:", i)
        if v: print("Score:", self.score(ytot, ptot))
        
        return (ytot, ptot)
    def score(self, target, prediction):
        return llfun(target, prediction)
开发者ID:EtienneDesticourt,项目名称:Kaggle-Avazu,代码行数:33,代码来源:LightModel.py

示例5: mine

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def mine():
    print("Starting")
    clf = SGDClassifier(loss='log',random_state=1,n_iter=1)
    print('Create/Load Classifier')
    doc_stream = stream_docs(path='./movie_data.csv')
    print('Fitting data')
    classes = np.array([0,1])
    for _ in range(45):
        X_train, y_train = get_minibatch(doc_stream, size=1000)
        if not X_train:
            break
        X_train = vect.transform(X_train)
        clf.partial_fit(X_train, y_train, classes=classes)
    print('Finished Fitting')

    X_test, y_test = get_minibatch(doc_stream, size=5000)
    X_test = vect.transform(X_test)
    print('Accuracy: %.3f' % clf.score(X_test,y_test))

    print('create pickle objects')
    dest = os.path.join('','pkl_objects')
    if not os.path.exists(dest):
        os.makedirs(dest)

    pickle.dump(stop, open(os.path.join(dest,'stopwords.pkl'),'wb'), protocol=4)
    pickle.dump(clf, open(os.path.join(dest,'classifier.pkl'),'wb'), protocol=4)
开发者ID:lorenzocastillo,项目名称:OpinionMining,代码行数:28,代码来源:OpinionMiner.py

示例6: apply_minibatch_sgd

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def apply_minibatch_sgd(datasets, minibatch, epoch=5, cores=1, seed=1):
    ''' Applies the logistic regression sgd method

    :type datasets: list
    :param datasets: List containing training/testing data
    
    :type minibatch: int
    :param minibatch: minibatch size
        
    :type cores: int
    :param cores: Number of cores
    
    :type seed: int
    :param seed: Random seed
    '''
    print 'Applying mini-batch SGD with mini-batch size of ', minibatch
    training_X, training_y = datasets[0]
    testing_X, testing_y = datasets[1]
    print 'Shuffling training data'
    training_X, training_y = shuffle(training_X, training_y, random_state = seed)
    clf = SGDClassifier(loss="log", random_state=seed, n_iter=epoch, verbose=0, n_jobs=cores)
    classes = numpy.unique([-1, 1])
    minibatches = training_X.shape[0]/minibatch + 1
    samples = training_X.shape[0]
    for i in xrange(epoch):
        print "Epoch ", i+1
        for j in xrange(minibatches):
            clf.partial_fit(training_X[j*minibatch:min(samples,(j+1)*minibatch)], training_y[j*minibatch:min(samples,(j+1)*minibatch)], classes=classes)
        print "Accuracy on testing data:", clf.score(testing_X, testing_y)
开发者ID:uci-cbcl,项目名称:DeepCADD,代码行数:31,代码来源:sklearn_CADD_sgd.py

示例7: test_transformer

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def test_transformer(transformer, data_set, configuration):

    clf = SGDClassifier(alpha=0.005)
    samples = []
    labels = range(10)
    for epoch in range(configuration.hyper_parameters.epochs):
        for index, sample in enumerate(transformer.compute_outputs(data_set.trainset[0], data_set.trainset[1], 1)):

            samples.append(sample.reshape((1, sample.shape[0])))
            if index % 10 == 9:
                clf.partial_fit(samples, labels, labels)
                samples = []
                gc.collect()

    error = 0
    count = 0
    test_predictions = []
    for index, sample in enumerate(transformer.compute_outputs(data_set.testset[0], data_set.testset[1], 1)):
        prediction = clf.predict(sample)
        if not prediction == index % 10:
            error += 1

        count += 1
        test_predictions.append(prediction)

    OutputLog().write('test predictions weight: {0}'.format(test_predictions))

    OutputLog().write('\nerror: %f%%\n' % error)
开发者ID:aviveise,项目名称:double_encoder,代码行数:30,代码来源:classifier.py

示例8: run_online_classifier

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def run_online_classifier():
    vect = HashingVectorizer(
        decode_error='ignore',
        n_features=2**21,
        preprocessor=None,
        tokenizer=tokenizer_streaming,
    )
    clf = SGDClassifier(loss='log', random_state=1, n_iter=1)

    csv_filename = os.path.join('datasets', 'movie_data.csv')
    doc_stream = stream_docs(path=csv_filename)

    classes = np.array([0, 1])
    for _ in range(45):
        X_train, y_train = get_minibatch(doc_stream, size=1000)
        if X_train is None:
            break
        else:
            X_train = vect.transform(X_train)
            clf.partial_fit(X_train, y_train, classes=classes)

    X_test, y_test = get_minibatch(doc_stream, size=5000)
    X_test = vect.transform(X_test)
    print("Test accuracy: %.3f" % clf.score(X_test, y_test))

    clf = clf.partial_fit(X_test, y_test)
开发者ID:jeremyn,项目名称:python-machine-learning-book,代码行数:28,代码来源:chapter_8.py

示例9: evaluate_svm

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def evaluate_svm(alpha):
    # Note: n_iter gets switched to 1 by sklearn whenever you call partial_fit(). This initial
    # setting is for the pretesting of eta0.
    basic_svm = SGDClassifier(loss="hinge", penalty="l2", l1_ratio=0.0, random_state=31337, n_jobs=5,
                              n_iter=5, alpha=alpha)

    learning_rate_grid = [ 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7 ]
    pretest_svm = GridSearchCV(basic_svm,
                               {"learning_rate": ["constant"],
                                "eta0": learning_rate_grid}).fit(X_pretest, y_pretest)
    bottou_gamma0 = pretest_svm.best_params_["eta0"]
    basic_svm.eta0 = bottou_gamma0
    basic_svm.learning_rate = "constant"

    basic_svm = basic_svm.partial_fit(X_pretest, y_pretest, classes = np.unique(y_train))

    progressive_val = []
    train_score = []
    for dp in range(0, X_train.shape[0], batch_size):
        t = dp + n_pretest
        basic_svm.eta0 = bottou_gamma0/(1 + bottou_gamma0*alpha*t)
        X_batch = X_train[dp:dp+batch_size]
        y_batch = y_train[dp:dp+batch_size]
        progressive_val.append(basic_svm.score(X_batch, y_batch))
        basic_svm = basic_svm.partial_fit(X_batch, y_batch)
        train_score.append(basic_svm.score(X_batch, y_batch))

    scores = progressive_val[-batches_for_cv_performance:]
    return np.mean(scores), np.std(scores), basic_svm
开发者ID:nerdcha,项目名称:cs231n,代码行数:31,代码来源:fit_svc.py

示例10: SGDRanker

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
class SGDRanker(BaseEstimator):

    """ Ranking predictor using stochastic gradient descent

    TODO:
    -allow configurable parameters for classifier
    -seed random state
    """

    def __init__(self, seconds=10):
        self.clf = SGDClassifier(loss='hinge')
        self.clf.fit_intercept = False
        self.clf.classes_ = np.array([-1, 1])
        self.seconds = seconds

    def fit(self, X, y):
        rows = X.shape[0]
        start_time = time.time()
        for i in itertools.count():
            if time.time() - start_time > self.seconds:
                return self
            idx1 = random.randint(0, rows - 1)
            idx2 = random.randint(0, rows - 1)
            y1, y2 = y[idx1], y[idx2]
            if y1 == y2:
                continue
            self.clf.partial_fit(X[idx1] - X[idx2], np.sign(y1 - y2))

    def predict(self, X):
        return np.dot(X, self.clf.coef_.T)
开发者ID:diogo149,项目名称:BooMLet,代码行数:32,代码来源:ranking.py

示例11: train_test_bow

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def train_test_bow(ngram_order, batch_size=128, n_epoch=3):
    label_sets = ['full', 'function', '3way', 'in_out', 'man_nat']
    for label_set in label_sets:
        # need to drop unk for full/function
        if label_set in ['full', 'function']:
            df = sentences_df(labels=label_set, drop_unk=True)
        else:
            df = sentences_df(SENTENCES_CSV, labels=label_set, drop_unk=False)
        X, y, word2idx, l_enc = load_dataset(df, ngram_order=ngram_order)
        print "X shape: %s" % (X.shape,)
        print "y shape: %s" % (y.shape,)
        skf = StratifiedKFold(y, n_folds=10, shuffle=True, random_state=0)
        scores = []
        for (train, test) in skf:
            clf = None
            clf = SGDClassifier(loss='log',
                                alpha=0.001,
                                l1_ratio=0,
                                random_state=0)
            for epoch in range(n_epoch):
                X_train, y_train, X_test, y_test = X[train], y[train], X[test], y[test]
                n_batches = X_train.shape[0] // batch_size
                for minibatch_idx in range(n_batches):
                    clf.partial_fit(
                        X_train[minibatch_idx * batch_size : (minibatch_idx+1) * batch_size],
                        y_train[minibatch_idx * batch_size : (minibatch_idx+1) * batch_size],
                        classes=np.unique(y))
                print "Epoch: %d/%d Train acc: %.4f" \
                    % (epoch+1, n_epoch, clf.score(X_train, y_train))
            fold_score = clf.score(X_test, y_test)
            print "Fold acc: %.4f" % fold_score
            scores.append(fold_score)
        print '%s label mean cv accuracy: %.4f\n' % (label_set, np.mean(scores))
开发者ID:cmward,项目名称:text-scene,代码行数:35,代码来源:maxent.py

示例12: train

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def train():
    model = SGDClassifier()
    for batch_no, batch in enumerate(db.mini_batches(100)):
        X, y = vectorize_batch(batch)
        model.partial_fit(X, y)
        if sampling and batch_no == 10:
            break
    return model
开发者ID:asharma567,项目名称:insider-model,代码行数:10,代码来源:train.py

示例13: create_classifier

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
    def create_classifier(self):
        DB.db.connect()
        clf = SGDClassifier( loss="modified_huber")
        labs_map = NameToIndex()

        with DB.db.transaction():
            offset = 0
            words_count = self.get_words_count()
            classes = numpy.arange(0,words_count)
            x_all = []
            y_all = []
            while True:
                print ' %d partial_fit %d'%(time(),offset)
                query = DB.Vocabulary\
                    .select(DB.Vocabulary.lv1, DB.Vocabulary.lv2)\
                    .join(DB.PcaModel, on=(DB.Vocabulary.feature == DB.PcaModel.feature)).order_by( DB.Vocabulary.feature).offset(offset).limit(1000)\
                    .tuples().iterator()
                features = numpy.array(map(lambda x:[x[0]]+list(x[1]),query))
                offset += len(features)
                if len(features) == 0:
                    break

                Y = features[:,0]
                X = features[:,1:]

                labs = []
                for lab in Y:
                    labs.append(labs_map.map(lab))

                if(len(x_all)<10000):
                    x_all = x_all + X.tolist()
                    y_all = y_all + labs
                labs = numpy.array(labs)

                #clf = LinearSVC()
                #clf = OneVsRestClassifier(SVC(probability=True, kernel='linear'))
                #clf.fit(X,labs)
                clf.partial_fit(X,labs,classes)
                print clf.score(x_all,y_all)

            DB.TrainingResult.delete().where(DB.TrainingResult.name == self.__class__.__name__+"_clf").execute()
            DB.TrainingResult.delete().where(DB.TrainingResult.name == self.__class__.__name__+"_labs_map").execute()

            tr = DB.TrainingResult()
            tr.name = self.__class__.__name__+"_clf"
            tr.data = clf
            tr.save()

            tr = DB.TrainingResult()
            tr.name = self.__class__.__name__+"_labs_map"
            tr.data = labs_map
            tr.save()
开发者ID:caoym,项目名称:odr,代码行数:54,代码来源:odr.py

示例14: chi_feature_select

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def chi_feature_select(train_file, test_file):

    lines = read_text_src(train_file)
    lines = [x for x in lines if len(x)>1]
    X_train = [line[1] for line in lines]
    y_train = [line[0] for line in lines]

    lines = read_text_src(test_file)
    lines = [x for x in lines if len(x) > 1]
    X_test = [line[1] for line in lines]
    y_test = [line[0] for line in lines]

    vectorizer = TfidfVectorizer(tokenizer=zh_tokenize)#ngram_range=(1,2)
    X_train = vectorizer.fit_transform(X_train)
    print X_train.shape

    X_test = vectorizer.transform(X_test)

    # word = vectorizer.get_feature_names()


    # N = X_train.shape[1]
    # ch2 = SelectKBest(chi2, k=int(N*0.2)) #.fit_transform(X, y)
    #
    #
    # X_train = ch2.fit_transform(X_train, y_train)
    # X_test = ch2.transform(X_test)

    # feature_names = [word[i] for i
    #                  in ch2.get_support(indices=True)]
    #

    # for i in feature_names:
    #     print i.encode('utf-8')
    # feature_names = np.asarray(feature_names)
    # print feature_names
    # clf = LinearSVC(penalty="l1", dual=False, tol=1e-3)

    # clf.fit(X_train, y_train)
    clf = SGDClassifier(loss="log", penalty='l1')
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    prob = clf.predict_proba(X_test[0])
    print prob
    X=["市场经济复苏,互联网公司蓬勃发展","世纪大战终于开启,勇士引得第73胜"]
    Y=['1','0']
    X=vectorizer.transform(X)
    clf.partial_fit(X,Y, classes=['0','1'])
    tmpx=['暴风科技股价大跌',"世纪大战终于开启,勇士引得第73胜"]
    tmpX=vectorizer.transform(tmpx)
    pred = clf.predict(tmpX)
    print pred
开发者ID:actlea,项目名称:TopicalCrawler,代码行数:54,代码来源:build_dict.py

示例15: main

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import partial_fit [as 别名]
def main():
    # Get training and model filenames
    with open('model_metadata.json') as f:
        config = json.load(f)

    CLASSES = [float(x) for x in config['classes']]
    model_filename = config['modelFilename']
    NUM_BITS_FOR_HASHING = config['numBitsForHashing']
    train_filename = config['trainFilename']
    sklearn_version_expected = config['sklearnVersion']

    # If sklearn version is wrong, exit without training
    if float(sklearn.__version__) != float(sklearn_version_expected):
        print "Wrong sklearn version"
        sys.exit(0)

    with open(train_filename) as f:
        lines = (tuple(line.rstrip('\n').split('\t')) for line in f)
        parsed_lines = ((line[1:], float(line[0])) for line in lines)

        # Parse header and get feature names for namespacing
        header = next(lines)
        FEATURE_NAMES = tuple(header[1:])

        # Build pipeline
        pre_processing_pipeline = make_pre_processing_pipeline(
            feature_names=FEATURE_NAMES,
            num_bits_for_hashing=NUM_BITS_FOR_HASHING
        )

        # Instantiate classifier
        # (a logistic regression model with Stochastic Gradient Descent)
        clf = SGDClassifier(loss='log')

        # Train model in mini-batches
        batch_size = 8000

        for rows, labels in batched_lines(batch_size, parsed_lines):
            processed_rows = pre_processing_pipeline.fit_transform(rows)
            clf.partial_fit(processed_rows, labels, classes=CLASSES)

        print clf

        # Save model
        joblib.dump(clf, model_filename)

        # Reload just to make sure it serializes and de- properly
        joblib.load(model_filename)
开发者ID:asafcombo,项目名称:adtech-dash-py,代码行数:50,代码来源:classifier.py


注:本文中的sklearn.linear_model.SGDClassifier.partial_fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。