当前位置: 首页>>代码示例>>Python>>正文


Python MultinomialNB.partial_fit方法代码示例

本文整理汇总了Python中sklearn.naive_bayes.MultinomialNB.partial_fit方法的典型用法代码示例。如果您正苦于以下问题:Python MultinomialNB.partial_fit方法的具体用法?Python MultinomialNB.partial_fit怎么用?Python MultinomialNB.partial_fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.naive_bayes.MultinomialNB的用法示例。


在下文中一共展示了MultinomialNB.partial_fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: Classifier

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import partial_fit [as 别名]
class Classifier():
    def __init__(self):
        """
        Initialize a classifier.
        """
        self.clf = MultinomialNB()


    def classify(self, docs, num_topics=5):
        """
        Classify a list of documents.

        Args:
            | docs (list)       -- the documents to classify (a list of strings)
            | num_topics (int)  -- number of top predicted topics
                                   to return for each doc.

        Returns:
            | list -- the list of lists of document topics.
        """

        # Returns a 2d array, where each array is
        # a list of probabilities for labels.
        docs_ = vectorize(docs)
        probs = self.clf.predict_proba(docs_)

        # This will sort the *indices* of the inner arrays, instead of the actual values.
        # These indices correspond with labels.
        # It goes from low to high.
        probs_sorted = probs.argsort()

        # Slice all the inner arrays to get `num_topics` top probabilities (their indices).
        probs_top = probs_sorted[:, -num_topics:]

        # Convert the indices to the actual labels, and return.
        return [self.clf.classes_[probs_indices] for prob_indices in top_probs]


    def train(self, docs, labels):
        """
        Train the classifier with documents and labels.
        The training can be online. That is, an existing
        classifier can be updated with new training data.

        Args:
            | docs (list)       -- the documents to train on (a list of strings)
            | labels (list)     -- the labels to train on (a list of lists of strings)
        """
        docs_ = vectorize(docs)
        self.clf.partial_fit(docs_, labels)
开发者ID:keho98,项目名称:argos,代码行数:52,代码来源:classify.py

示例2: WeightedPartialFitPassiveTransferClassifier

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import partial_fit [as 别名]
class WeightedPartialFitPassiveTransferClassifier(object):
  def __init__(self, target_weight):
    self.classifier = MultinomialNB()
    self.target_weight = target_weight
    self.vectorizer = FullContextBagOfWordsLeftRightCutoff(9)

  # Train on unambiguous annotatios which have a group number
  def train_source(self, annotations):
    X = self.vectorizer.fit_transform(annotations)
    y = numpy.array([annotation.get_group_number() for annotation in annotations])

    self.classifier.fit(X, y)

  # Train on ambiguous annotations with according group labels
  def train_target_online(self, annotations, labels):
    X = self.vectorizer.transform(annotations)
    y = numpy.array([Annotation.GROUP_MAPPING[label] for label in labels])

    weight_vector = [self.target_weight] * len(annotations)
    self.classifier.partial_fit(X, y, Annotation.GROUP_MAPPING.values(), weight_vector)

  def get_group_number_prob_pair(self, annotation, prob_vector):
    group_option_indices = annotation.get_group_number()
    group_option_prob = [prob_vector[group_option_index] for group_option_index in group_option_indices]
    return max(zip(group_option_indices, group_option_prob), key = lambda (index, prob): prob)
 
  def get_group_number(self, annotation, prob_vector):
    group_index, _ = self.get_group_number_prob_pair(annotation, prob_vector)
    return group_index

  # tested, results for the classifier trained on source are not random
  def predict(self, annotations):
    X = self.vectorizer.transform(annotations)
    probs = self.classifier.predict_proba(X) # [n_samples, n_classes]
    return numpy.array([self.get_group_number(annotation, row)
     for row, annotation in itertools.izip(probs, annotations)])

  # tested, results for the classifier trained on source are not random
  def get_max_probability(self, annotation, prob_vector):
    _, prob = self.get_group_number_prob_pair(annotation, prob_vector)
    return prob

  def get_prob_estimates(self, annotations):
    X = self.vectorizer.transform(annotations)
    probs = self.classifier.predict_proba(X)
    return numpy.array([self.get_max_probability(annotation, row)
      for row, annotation in itertools.izip(probs, annotations)])
开发者ID:martinthenext,项目名称:eth_ml,代码行数:49,代码来源:transfer.py

示例3: multinomial_bayes_sklearn

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import partial_fit [as 别名]
def multinomial_bayes_sklearn(corpus, documents_training, documents_test, words_features, smoothing):
    """
    Multinomial Naive Bayes sing only MultinomialNB sklearn library
    Training in parts to avoid memory problems
    :param corpus:
    :param documents_training:
    :param documents_test:
    :param words_features:
    :param smoothing:
    :return:
    """

    print "-----Multinomial Bayes sklearn pure algorithm------"
    categories = util_classify.get_categories(corpus)    
    classifier = MultinomialNB(alpha=smoothing)
          
    '''
    print "Entrenando algoritmo por completo..."
    X_train_features = []
    y_train_categories = []
    ##### Entrenandolo de golpe
    for (id ,original_category, annotations) in documents_training:        
        X_train_features.append(util_classify.transform_document_in_vector(annotations,words_features,corpus)) 
        y_train_categories.append(original_category)
    
    classifier.fit(np.array(X_train_features), np.array(y_train_categories))    
    '''
    
    # Training in parts
    print "Training algorithm in parts..."
    first = True
    for (id, original_category, annotations) in documents_training:
        if first is True:
            classifier.partial_fit(np.array(util_classify.transform_document_in_vector(annotations, words_features, corpus)), np.array([original_category]), classes=categories)
            first = False
        else:
            classifier.partial_fit(np.array(util_classify.transform_document_in_vector(annotations, words_features, corpus)), np.array([original_category]))
                      
    print "Calculating metrics..."
    estimated_categories = []
    original_categories = []               
    
    for (id, cat_original, annotations) in documents_test:
        cat_estimated = classifier.predict(np.array((util_classify.transform_document_in_vector(annotations, words_features, corpus))))
        estimated_categories.append(categories.index(cat_estimated))
        original_categories.append(categories.index(cat_original))
    return original_categories, estimated_categories
开发者ID:itecsde,项目名称:classification,代码行数:49,代码来源:classify_methods.py

示例4: GraphemeBasedModel

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import partial_fit [as 别名]
class GraphemeBasedModel(DiacriticsRestorationModel):
    def __init__(self, window=5, input_classes=None):
        self.window = window
        self.input_classes = input_classes

    def train(self, corpus, classes=None, chunk_size=100000):
        self.vectorizer = FeatureHasher(non_negative=True,
                                        n_features=len(classes)*2*self.window,
                                        input_type='pair')
        self.clf = MultinomialNB()
        i = 0
        j = 0
        X = []
        Y = []
        for x, y in corpus:
            if x[self.window][1] in self.input_classes:
                X.append(x)
                Y.append(y)
                i += 1
            if i < chunk_size:
                continue

            j += 1
            click.echo("Running iteration {}".format(j))

            X = self.vectorizer.transform(X)
            self.clf.partial_fit(X, Y, classes)
            X = []
            Y = []
            i = 0

    def restore(self, string):
        corpus = []
        out = ''
        for x, y in string_to_grapheme_corpus(string, self.window):
            if x[self.window][1] in self.input_classes:
                x = self.vectorizer.transform([x])
                out += self.clf.predict(x)[0]
            else:
                out += y
        return out
开发者ID:mrshu,项目名称:diaqres,代码行数:43,代码来源:models.py

示例5: __init__

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import partial_fit [as 别名]
class GOTSpoilerChecker:
    def __init__(self, X, Y, classes, ngram_range=(1, 3)):
        self.X = X
        self.Y = Y
        self.classes = classes
        self.ngram_range = ngram_range
        self.learn()

    def learn(self):
        self.tf_idf = TfidfVectorizer(ngram_range=self.ngram_range, smooth_idf=True, max_df=0.7)
        self.tf_idf.fit(self.X)
        self.nb = MultinomialNB()
        for i in range(0, len(self.X), batch_size):
            batch_end = min(i + batch_size, len(self.X))
            self.nb.partial_fit(self.tf_idf.transform(self.X[i:batch_end]), self.Y[i:batch_end], classes=self.classes)

    def classify(self, data):
        return self.nb.predict(self.tf_idf.transform([data]))[0]

    def classify_list(self, data):
        return self.nb.predict(self.tf_idf.transform(data))
开发者ID:dianagastrin,项目名称:hackathon2016June,代码行数:23,代码来源:BaseClassifier.py

示例6: test_alpha_vector

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import partial_fit [as 别名]
def test_alpha_vector():
    X = np.array([[1, 0], [1, 1]])
    y = np.array([0, 1])

    # Setting alpha=np.array with same length
    # as number of features should be fine
    alpha = np.array([1, 2])
    nb = MultinomialNB(alpha=alpha)
    nb.partial_fit(X, y, classes=[0, 1])

    # Test feature probabilities uses pseudo-counts (alpha)
    feature_prob = np.array([[1 / 2, 1 / 2], [2 / 5, 3 / 5]])
    assert_array_almost_equal(nb.feature_log_prob_, np.log(feature_prob))

    # Test predictions
    prob = np.array([[5 / 9, 4 / 9], [25 / 49, 24 / 49]])
    assert_array_almost_equal(nb.predict_proba(X), prob)

    # Test alpha non-negative
    alpha = np.array([1., -0.1])
    expected_msg = ('Smoothing parameter alpha = -1.0e-01. '
                    'alpha should be > 0.')
    m_nb = MultinomialNB(alpha=alpha)
    assert_raise_message(ValueError, expected_msg, m_nb.fit, X, y)

    # Test that too small pseudo-counts are replaced
    ALPHA_MIN = 1e-10
    alpha = np.array([ALPHA_MIN / 2, 0.5])
    m_nb = MultinomialNB(alpha=alpha)
    m_nb.partial_fit(X, y, classes=[0, 1])
    assert_array_almost_equal(m_nb._check_alpha(),
                              [ALPHA_MIN, 0.5],
                              decimal=12)

    # Test correct dimensions
    alpha = np.array([1., 2., 3.])
    m_nb = MultinomialNB(alpha=alpha)
    expected_msg = ('alpha should be a scalar or a numpy array '
                    'with shape [n_features]')
    assert_raise_message(ValueError, expected_msg, m_nb.fit, X, y)
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:42,代码来源:test_naive_bayes.py

示例7: CombinedProbTransferClassifier

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import partial_fit [as 别名]
class CombinedProbTransferClassifier(WeightedPartialFitPassiveTransferClassifier):
  def __init__(self, beta=0.5):
    self.source_classifier = MultinomialNB()
    self.target_classifier = MultinomialNB(fit_prior=False)
    self.beta = beta
    self.vectorizer = FullContextBagOfWordsLeftRightCutoff(9)

  # Train on unambiguous annotatios which have a group number
  def train_source(self, annotations):
    X = self.vectorizer.fit_transform(annotations)
    y = numpy.array([annotation.get_group_number() for annotation in annotations])

    self.source_classifier.fit(X, y)
    self.target_classifier.fit(X, y, sample_weight=0)
 
  # Train on ambiguous annotations with according group labels
  def train_target_online(self, annotations, labels):
    X = self.vectorizer.transform(annotations)
    y = numpy.array([Annotation.GROUP_MAPPING[label] for label in labels])

    self.target_classifier.partial_fit(X, y, Annotation.GROUP_MAPPING.values())

  def predict(self, annotations):
    X = self.vectorizer.transform(annotations)
    source_prob = self.source_classifier.predict_proba(X) # [n_samples, n_classes]
    target_prob = self.target_classifier.predict_proba(X)
    combined_prob = self.beta*source_prob + (1-self.beta)*target_prob

    return numpy.array([self.get_group_number(annotation, row)
     for row, annotation in itertools.izip(combined_prob, annotations)])

  def get_prob_estimates(self, annotations):
    X = self.vectorizer.transform(annotations)
    source_prob = self.source_classifier.predict_proba(X) # [n_samples, n_classes]
    target_prob = self.target_classifier.predict_proba(X)
    combined_prob = self.beta*source_prob + (1-self.beta)*target_prob

    return numpy.array([self.get_max_probability(annotation, row)
      for row, annotation in itertools.izip(combined_prob, annotations)])
开发者ID:martinthenext,项目名称:eth_ml,代码行数:41,代码来源:transfer.py

示例8: test_mnnb

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import partial_fit [as 别名]
def test_mnnb(kind):
    # Test Multinomial Naive Bayes classification.
    # This checks that MultinomialNB implements fit and predict and returns
    # correct values for a simple toy dataset.

    if kind == 'dense':
        X = X2
    elif kind == 'sparse':
        X = scipy.sparse.csr_matrix(X2)

    # Check the ability to predict the learning set.
    clf = MultinomialNB()
    assert_raises(ValueError, clf.fit, -X, y2)
    y_pred = clf.fit(X, y2).predict(X)

    assert_array_equal(y_pred, y2)

    # Verify that np.log(clf.predict_proba(X)) gives the same results as
    # clf.predict_log_proba(X)
    y_pred_proba = clf.predict_proba(X)
    y_pred_log_proba = clf.predict_log_proba(X)
    assert_array_almost_equal(np.log(y_pred_proba), y_pred_log_proba, 8)

    # Check that incremental fitting yields the same results
    clf2 = MultinomialNB()
    clf2.partial_fit(X[:2], y2[:2], classes=np.unique(y2))
    clf2.partial_fit(X[2:5], y2[2:5])
    clf2.partial_fit(X[5:], y2[5:])

    y_pred2 = clf2.predict(X)
    assert_array_equal(y_pred2, y2)

    y_pred_proba2 = clf2.predict_proba(X)
    y_pred_log_proba2 = clf2.predict_log_proba(X)
    assert_array_almost_equal(np.log(y_pred_proba2), y_pred_log_proba2, 8)
    assert_array_almost_equal(y_pred_proba2, y_pred_proba)
    assert_array_almost_equal(y_pred_log_proba2, y_pred_log_proba)

    # Partial fit on the whole data at once should be the same as fit too
    clf3 = MultinomialNB()
    clf3.partial_fit(X, y2, classes=np.unique(y2))

    y_pred3 = clf3.predict(X)
    assert_array_equal(y_pred3, y2)
    y_pred_proba3 = clf3.predict_proba(X)
    y_pred_log_proba3 = clf3.predict_log_proba(X)
    assert_array_almost_equal(np.log(y_pred_proba3), y_pred_log_proba3, 8)
    assert_array_almost_equal(y_pred_proba3, y_pred_proba)
    assert_array_almost_equal(y_pred_log_proba3, y_pred_log_proba)
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:51,代码来源:test_naive_bayes.py

示例9: trainRandomForest

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import partial_fit [as 别名]
def trainRandomForest():



    columns = components + feature
    print columns
    # date_time,site_name,posa_continent,user_location_country,user_location_region,user_location_city,orig_destination_distance,user_id,is_mobile,is_package,channel,srch_ci,srch_co,srch_adults_cnt,srch_children_cnt,srch_rm_cnt,srch_destination_id,srch_destination_type_id,is_booking,cnt,hotel_continent,hotel_country,hotel_market,hotel_cluster
    train = pd.read_csv("data/train.csv", header=0,
                        names=['date_time', 'site_name', 'posa_continent', 'user_location_user',
                               'country_location_region', 'user_location_city', 'orig_destination_distance', 'user_id',
                               'is_mobile',
                               'is_package', 'channel', 'srch_ci', 'srch_co', 'srch_adults_cnt', 'srch_children_cnt',
                               'srch_rm_cnt',
                               'srch_destination_id', 'srch_destination_type_id', 'is_booking', 'cnt',
                               'hotel_continent',
                               'hotel_country', 'hotel_market', 'hotel_cluster']
                        , parse_dates=['date_time', 'srch_ci', 'srch_co'], chunksize=100000, skiprows=100000)


    test =  pd.read_csv("data/train.csv", header=0,
                        names=['date_time', 'site_name', 'posa_continent', 'user_location_user',
                               'country_location_region', 'user_location_city', 'orig_destination_distance', 'user_id',
                               'is_mobile',
                               'is_package', 'channel', 'srch_ci', 'srch_co', 'srch_adults_cnt', 'srch_children_cnt',
                               'srch_rm_cnt',
                               'srch_destination_id', 'srch_destination_type_id', 'is_booking', 'cnt',
                               'hotel_continent',
                               'hotel_country', 'hotel_market', 'hotel_cluster']
                        , parse_dates=['date_time', 'srch_ci', 'srch_co'], nrows=100000)





    clf = linear_model.SGDClassifier(loss='log', penalty="elasticnet", n_iter=70,n_jobs=4)

    clf2 = MultinomialNB()

    # n_components = 2
    # ipca = IncrementalPCA(n_components=n_components, batch_size=10)

    n = 0;

    print('-' * 38)
    cls =  np.arange(100)
    # http://stackoverflow.com/questions/28489667/combining-random-forest-models-in-scikit-learn
    for chunk in train:
        agg = chunk.groupby(columns)['is_booking'].agg(['count'])
        agg.reset_index(inplace=True)

        X_train = agg[components]
        y_train = agg['hotel_cluster']
        clf.partial_fit(X_train,  y_train, classes= cls)
        clf2.partial_fit(X_train, y_train, classes= cls)
        print n
        n = n + 1

    print('')

    X_test = test[components]
    y_test = test['hotel_cluster']

    score = clf.score(X_test, y_test)
    print 'score SGDClassifier', score

    score = clf2.score(X_test, y_test)
    print 'score MultinomialNB', score

    return clf
开发者ID:janglada,项目名称:KaggleExpedia,代码行数:71,代码来源:pca.py

示例10: MultinomialNB

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import partial_fit [as 别名]
 clf = MultinomialNB(alpha=ALPHA)
 cv = CountVectorizer(stop_words=stop_words, min_df=2)
 cv = CountVectorizer()
 #import pdb
 #pdb.set_trace()
 x1 = cv.fit_transform(news.data[0:10]).toarray()
 y1 = news.target[0:10]
 
 x2 = cv.fit_transform(news.data[11:20]).toarray()
 y2 = news.target[11:20]
 
 x3 = cv.fit_transform(news.data[21:30]).toarray()
 y3 = news.target[21:30]
 #print X
 #print y1, y2
 #print cv.get_feature_names()
 #print news.target[0:10]
 #print news.target[0:5]
 #print news.target[5:10]
 print np.unique(news.target[0:30])
 
 clf.partial_fit(x1, y1, classes=np.unique(news.target[0:30]))
 clf.partial_fit(x2, y2)
 clf.partial_fit(x3, y3)
 
 
 
 
     
 
 print 'Done'
开发者ID:meotomit,项目名称:crawler01,代码行数:33,代码来源:Learner_1.py

示例11: float

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import partial_fit [as 别名]
    d = datetime.strptime(str(x), "%y%m%d%H")
    return [float(d.weekday()), float(d.hour)]

fh = FeatureHasher(n_features = 2**20, input_type="string", non_negative=True)

# Train classifier
clf = MultinomialNB()
train = pd.read_csv("testtrain.csv", chunksize = 50000, iterator = True)
all_classes = np.array([0, 1])
for chunk in train:
    y_train = chunk["click"]
    chunk = chunk[cols]
    chunk = chunk.join(pd.DataFrame([dayhour(x) for x in chunk.hour], columns=["wd", "hr"]))
    chunk.drop(["hour"], axis=1, inplace = True)
    Xcat = fh.transform(np.asarray(chunk.astype(str)))
    clf.partial_fit(Xcat, y_train, classes=all_classes)
    
# Create a submission file
usecols = cols + ["id"]
X_test = pd.read_csv("testtest.csv", usecols=usecols)
X_test = X_test.join(pd.DataFrame([dayhour(x) for x in X_test.hour], columns=["wd", "hr"]))
X_test.drop(["hour"], axis=1, inplace = True)

X_enc_test = fh.transform(np.asarray(X_test.astype(str)))

y_act = pd.read_csv("testtest.csv", usecols=['click'])
y_pred = clf.predict_proba(X_enc_test)[:, 1]

with open('logloss.txt','a') as f:
    f.write('\n'+str(log_loss(y_act, y_pred))+'\tMultinomialNB')
开发者ID:evamy,项目名称:avazu-ctr,代码行数:32,代码来源:MultinomialNB.py

示例12: CountVectorizer

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import partial_fit [as 别名]
"""
messages = pandas.read_csv(FILE_TRAIN, sep='\t', quoting=csv.QUOTE_NONE,names=["label", "message"])


bow_transformer = CountVectorizer(analyzer=split_into_lemmas).fit(messages['message'])
messages_bow = bow_transformer.transform(messages['message'])
tfidf_transformer = TfidfTransformer().fit(messages_bow)
messages_tfidf = tfidf_transformer.transform(messages_bow)

classe = ['ham', 'spam']

if 'nb_model.pkl' not in os.listdir("./"):
	print 'First trainning'
	nb = MultinomialNB()
	classe = ['ham', 'spam']
	nb.partial_fit(messages_tfidf, messages['label'],classes=classe)
	all_predictions = nb.predict(messages_tfidf)
	msg_train, msg_test, label_train, label_test = train_test_split(messages['message'], messages['label'], test_size=0.3)
	print classification_report(messages['label'], all_predictions)
	# store the spam detector to disk after training
	with open('nb_model.pkl', 'wb') as fout:
	    cPickle.dump(nb, fout)
else:
	print "Training with partial_fit"
	with open('nb_model.pkl','rb') as f:
		nb = cPickle.load(f)
	nb.partial_fit(messages_tfidf, messages['label'],classes=classe)
        all_predictions = nb.predict(messages_tfidf)
        print classification_report(messages['label'], all_predictions)
        # store the spam detector to disk after training
        with open('nb_model.pkl', 'wb') as fout:
开发者ID:Michou8,项目名称:Spam_detection,代码行数:33,代码来源:detecspam.py

示例13: enumerate

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import partial_fit [as 别名]
tf = t0
#for irun, chunk in enumerate(pd.read_sql_query("SELECT * FROM trainSearchStream WHERE IsClick IN (0,1) ORDER BY RANDOM();", engine, chunksize=2000000)):
#for irun, chunk in enumerate(pd.read_sql_query("SELECT * FROM trainSearchStream WHERE IsClick IN (0,1);", engine, chunksize=2000000)):
for irun, chunk in enumerate(pd.read_sql_query("SELECT * FROM trainSearchRandom;", engine2, chunksize=2000000)):
    # for chunk in pd.read_sql_query("SELECT * FROM trainSearchStream", engine, chunksize=10000):
    ti = time.time()
    print "Query time: ", ti - tf
    if irun == 0:
        X_val, Y_val = make_chunk_features(chunk)
        tj = time.time()
        print "Make feature time: ", tj - ti
    else:
        X_train_temp, Y_train_temp = make_chunk_features(chunk)   
        tj = time.time()
        print "Make feature time: ", tj - ti
        clf.partial_fit(X_train_temp, Y_train_temp, classes=all_classes)
            
        n_train += len(X_train_temp)
        n_train_pos += sum(Y_train_temp)
        y_pred = clf.predict_proba(X_val.values.astype(float))
        logloss = log_loss(Y_val.values.astype(float), y_pred)
        losses.append(logloss)
        print "Logloss: ", logloss, "n_train: ", n_train, "n_train_pos: ", n_train_pos
        #s = clf.score(X_val.values.astype(float), Y_val.values.astype(float))
        #scores.append(s)
        #print "Score: ", s, "n_train: ", n_train, "n_train_pos: ", n_train_pos
    tf = time.time()
    print "Training time: ", tj - ti
y_pred = clf.predict_proba(X_val.values.astype(float))
logloss = log_loss(Y_val.values.astype(float), y_pred)
#print scores    
开发者ID:npetitclerc,项目名称:avito,代码行数:33,代码来源:try_histCTR_online.py

示例14: MultinomialNB

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import partial_fit [as 别名]
testlabelnames = testloader.extractLabelNames()

#dictionary { sentiment:[] event:[] time:[] } of label indices for each training example
testlabelindices = testloader.extractLabelIndices()

classifiers = {}
print "training naive bayes classifiers on test data"
# Train a multinomial naive bayes on each label type 
for labeltype in ['sentiment', 'event', 'time']:
	nbclassifier = MultinomialNB()
	# the trainY is a single index for the maximum confidence label in a label class
	y = trainlabelindices[labeltype]
	# list of all possible labels for nbclassifier
	indices = [ i for i in range(len(loader.labelnames[labeltype]))]
	# partial fit works when you don't use the full training set 
	nbclassifier.partial_fit(trainX, y, indices)
	classifiers[labeltype] = nbclassifier

print 'running csp on each example'
backsearch = BacktrackingSearch() 
#controls the minimum probability for a label to be considered in the csp
probabilitythreshold = .2
# controls the minimum confidence for a label to be present in the gold bit vector
confidencethreshold = .5
# gold output for evaluation for each training example
testgoldvectors = testloader.extractLabelBitVectors(confidencethreshold)

#Create a new csp for each example and assign unary potentials according to the classifier
#Solve the csp using backtracking search
#Compare the resulting assignment to the goldlabel vectors to get accuracy
开发者ID:simonzheng,项目名称:simonlucas-tweet-weather-classifier,代码行数:32,代码来源:evaluatestructuredprediction.py

示例15: __init__

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import partial_fit [as 别名]
class Classifier:
    """
    Multinomial Naive Bayes classifier.
    Provides binary classification; that is,
    labels are either 0 or 1,
    0 being negative,
    1 being positive.
    """

    def __init__(self, filepath=path.join(__location__, "classifier.pickle")):
        """
        Initialize the classifier.
        Tries to load the existing one;
        if none exists, a new one is created.
        """
        self.filepath = filepath

        hasher = HashingVectorizer(stop_words="english", non_negative=True, norm=None, binary=False)
        self.vectorizer = Pipeline((("hasher", hasher), ("tf_idf", TfidfTransformer())))

        # Try to load the existing classifier.
        self.clf = self.load()

        # If there wasn't one, create a new one.
        if not self.clf:
            self.clf = MultinomialNB(alpha=0.1)

    def train(self, docs, labels, save=True):
        """
        Updates the classifier with new training data.
        By default, saves the updated classifier as well.
        """
        if docs:
            training = self.vectorizer.fit_transform(docs)
            self.clf.partial_fit(training, labels, [0, 1])
            if save:
                self.save()

    def classify(self, docs):
        """
        Classifies a list of documents.
        Returns a list of class probabilities
        for each document.
        """
        docs_ = self.vectorizer.fit_transform(docs)
        try:
            return self.clf.predict_proba(docs_)

        # Likely because the classifier hasn't been trained yet.
        except AttributeError:
            return []

    def save(self):
        """
        Persist the classifier to the disk.
        """
        file = open(self.filepath, "wb")
        pickle.dump(self.clf, file)

    def load(self):
        """
        Load the classifier from disk.
        Returns None if one wasn't found.
        """
        try:
            file = open(self.filepath, "rb")
            return pickle.load(file)
        except IOError:
            return None
开发者ID:publicscience,项目名称:brain,代码行数:71,代码来源:classifier.py


注:本文中的sklearn.naive_bayes.MultinomialNB.partial_fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。