本文整理汇总了Python中sklearn.naive_bayes.MultinomialNB.predict_log_proba方法的典型用法代码示例。如果您正苦于以下问题:Python MultinomialNB.predict_log_proba方法的具体用法?Python MultinomialNB.predict_log_proba怎么用?Python MultinomialNB.predict_log_proba使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.naive_bayes.MultinomialNB
的用法示例。
在下文中一共展示了MultinomialNB.predict_log_proba方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_mnnb
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_log_proba [as 别名]
def test_mnnb(kind):
# Test Multinomial Naive Bayes classification.
# This checks that MultinomialNB implements fit and predict and returns
# correct values for a simple toy dataset.
if kind == 'dense':
X = X2
elif kind == 'sparse':
X = scipy.sparse.csr_matrix(X2)
# Check the ability to predict the learning set.
clf = MultinomialNB()
assert_raises(ValueError, clf.fit, -X, y2)
y_pred = clf.fit(X, y2).predict(X)
assert_array_equal(y_pred, y2)
# Verify that np.log(clf.predict_proba(X)) gives the same results as
# clf.predict_log_proba(X)
y_pred_proba = clf.predict_proba(X)
y_pred_log_proba = clf.predict_log_proba(X)
assert_array_almost_equal(np.log(y_pred_proba), y_pred_log_proba, 8)
# Check that incremental fitting yields the same results
clf2 = MultinomialNB()
clf2.partial_fit(X[:2], y2[:2], classes=np.unique(y2))
clf2.partial_fit(X[2:5], y2[2:5])
clf2.partial_fit(X[5:], y2[5:])
y_pred2 = clf2.predict(X)
assert_array_equal(y_pred2, y2)
y_pred_proba2 = clf2.predict_proba(X)
y_pred_log_proba2 = clf2.predict_log_proba(X)
assert_array_almost_equal(np.log(y_pred_proba2), y_pred_log_proba2, 8)
assert_array_almost_equal(y_pred_proba2, y_pred_proba)
assert_array_almost_equal(y_pred_log_proba2, y_pred_log_proba)
# Partial fit on the whole data at once should be the same as fit too
clf3 = MultinomialNB()
clf3.partial_fit(X, y2, classes=np.unique(y2))
y_pred3 = clf3.predict(X)
assert_array_equal(y_pred3, y2)
y_pred_proba3 = clf3.predict_proba(X)
y_pred_log_proba3 = clf3.predict_log_proba(X)
assert_array_almost_equal(np.log(y_pred_proba3), y_pred_log_proba3, 8)
assert_array_almost_equal(y_pred_proba3, y_pred_proba)
assert_array_almost_equal(y_pred_log_proba3, y_pred_log_proba)
示例2: recommend
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_log_proba [as 别名]
def recommend(twitterword):
newpd = get_words_df()
#newpd = pd.read_csv('twitter_bigdf_appended_cleanedtweets_averageperuser.csv')
newpd['Tweet'] = newpd['Tweet'].map(lambda x: str(x))
newpd['was_retweeted'] = newpd['average_retweet_threshold']
best_alpha = 50.0
best_min_df = 0.01
vectorizer = CountVectorizer(min_df=best_min_df)
x, y = make_xy(newpd, vectorizer)
xtrain, xtest, ytrain, ytest = train_test_split(x, y)
clf = MultinomialNB(alpha=best_alpha).fit(xtrain, ytrain)
probs = clf.predict_log_proba(x)[:, 0]
prob = clf.predict_proba(x)[:, 0]
predict = clf.predict(x)
retweet_chance = clf.predict_proba(vectorizer.transform([twitterword]))
answer = retweet_chance[0][1] * 100
return answer
示例3: return_framing_data
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_log_proba [as 别名]
def return_framing_data(training_set, output_filepath):
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(training_set.data)
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
clf = MultinomialNB(alpha=1.0,fit_prior=False).fit(X_train_tfidf,training_set.target)
X_new_counts = count_vect.transform(frames)
X_new_tfidf = tfidf_transformer.transform(X_new_counts)
predicted_logs = clf.predict_log_proba(X_new_tfidf)
return predicted_logs
示例4: return_framing_datum
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_log_proba [as 别名]
def return_framing_datum(training_set, frame):
'''This is hacky- fix it later'''
frames = [frame.word_string];
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(training_set.data)
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
clf = MultinomialNB(alpha=1.0,fit_prior=False).fit(X_train_tfidf,training_set.target)
X_new_counts = count_vect.transform(frames)
X_new_tfidf = tfidf_transformer.transform(X_new_counts)
predicted_logs = clf.predict_log_proba(X_new_tfidf)
print predicted_logs
return predicted_logs[0]
示例5: write_framing_data
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_log_proba [as 别名]
def write_framing_data(training_set, output_filepath):
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(training_set.data)
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
clf = MultinomialNB(alpha=1.0,fit_prior=False).fit(X_train_tfidf,training_set.target)
X_new_counts = count_vect.transform(frames)
X_new_tfidf = tfidf_transformer.transform(X_new_counts)
predicted_logs = clf.predict_log_proba(X_new_tfidf)
f = open(output_filepath,'w')
f.write('Frame Names and Order:\n'+str(frame_order)+'\n\n')
#f.write('Frames:\n'+str(frames)+'\n\n')
f.write('Training Set:\n'+str(training_set.target_names)+'\n\n')
f.write('Log-Likelihoods:\n'+str(predicted_logs)+'\n\n')
f.close()
示例6: proc
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_log_proba [as 别名]
def proc(X, y, ids, out_file):
#http://stackoverflow.com/questions/31421413/how-to-compute-precision-recall-accuracy-and-f1-score-for-the-multiclass-case
logger.info("Sentiment Analysis")
ret = dict()
train = X.shape[0]
pp( X.shape)
Xtrain = X
ytrain = y
clf = MultinomialNB()
clf.fit(Xtrain, ytrain)
fo = open(out_file,'w')
error = 0
for i,text in enumerate(Xtrain):
if i % PRINT_EVERY == 0:
logger.info("Working on %d"%i)
y_pred = clf.predict(text)
y_pred_proba = clf.predict_proba(text)
y_pred_log_proba = clf.predict_log_proba(text)
review_id = ids[i]['rid']
funny = ids[i]['funny']
useful = ids[i]['useful']
cool = ids[i]['cool']
user_id = ids[i]['user_id']
business_id = ids[i]['business_id']
yip = y_pred[0] - 1
if yip < 0:
yip = 0
if y[i] != y_pred[0]:
error += 1
#print review_id,y_pred[0],y_pred_proba[0][yip]
line = json.dumps({'review_id':review_id, 'user_id':user_id, 'business_id':business_id, 'y':y[i],'y_pred':y_pred[0], 'y_pred_proba':y_pred_proba[0][yip], 'y_pred_log_proba':y_pred_log_proba[0][yip], 'funny':funny, 'useful':useful,'cool':cool})
fo.write("%s\n"%line)
fo.close()
print "error:",error
print "ratio:",100.0*float(error)/float(train)
logger.info("generated file %s"%out_file)
示例7: test_mnnb
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_log_proba [as 别名]
def test_mnnb():
"""
Multinomial Naive Bayes classification.
This checks that MultinomialNB implements fit and predict and returns
correct values for a simple toy dataset.
"""
for X in [X2, scipy.sparse.csr_matrix(X2)]:
# Check the ability to predict the learning set.
clf = MultinomialNB()
y_pred = clf.fit(X, y2).predict(X)
assert_array_equal(y_pred, y2)
# Verify that np.log(clf.predict_proba(X)) gives the same results as
# clf.predict_log_proba(X)
y_pred_proba = clf.predict_proba(X)
y_pred_log_proba = clf.predict_log_proba(X)
assert_array_almost_equal(np.log(y_pred_proba), y_pred_log_proba, 8)
示例8: classifyFrames
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_log_proba [as 别名]
def classifyFrames(frame_titles, frames):
"""Train a classifier on the 20 Newsgroups training data, feed the frames to the classifier,
and print results to text file in the results directory.
Arguments:
frame_titles -- a list contaitning a string title for each frame in the "frames" arguement
frames -- a list containing each frame (space deliniated string of words)
Side-Effects:
prints a file "20newsgroupsclassifier.txt" to the /results directory
"""
#training_set = load_files('newsgroups',shuffle=True)
from sklearn.datasets import fetch_20newsgroups
training_set = fetch_20newsgroups(subset='train')
#Learn the vocabulary of the dictionary and return a count vector
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(training_set.data)
#use tf/idf to give low weight to very common words in training data
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
#train multinomial naive bayes classifier on 20newsgroups data
clf = MultinomialNB(alpha=1.0,fit_prior=False).fit(X_train_tfidf,training_set.target)
#vectorize and weight words in the frames
X_new_counts = count_vect.transform(frames)
X_new_tfidf = tfidf_transformer.transform(X_new_counts)
predicted_logs = clf.predict_log_proba(X_new_tfidf)
#write output
f = open('20newsgroupsclassifier.txt','w')
f.write('Frame Names and Order:\n'+str(frame_order)+'\n\n')
f.write('Frames:\n'+str(frames)+'\n\n')
f.write('Training Set:\n'+str(training_set.target_names)+'\n\n')
f.write('Log-Likelihoods:\n'+str(predicted_logs)+'\n\n')
f.close()
示例9: train_test_split
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_log_proba [as 别名]
xtrain, xtest, ytrain, ytest = train_test_split(X, Y)
clf = MultinomialNB(alpha=best_alpha).fit(xtrain, ytrain)
calibration_plot(clf, xtest, ytest)
training_accuracy = clf.score(xtrain, ytrain)
test_accuracy = clf.score(xtest, ytest)
print "Accuracy on trainnig data: %0.2f" % training_accuracy
print "Accuraby on test data: %0.2f" % test_accuracy
words = np.array(vectorizer.get_feature_names())
x = np.eye(xtest.shape[1])
probs = clf.predict_log_proba(x)[:, 0]
ind = np.argsort(probs)
good_words = words[ind[:10]]
bad_words = words[ind[-10:]]
good_prob = probs[ind[:10]]
bad_prob = probs[ind[-10:]]
print "Good words\t P(fresh | word)"
for w, p in zip(good_words, good_prob):
print "%20s" % w, "%0.2f" % (1 - np.exp(p))
print "Bad words\t P(fresh | word)"
for w, p in zip(bad_words, bad_prob):
print "%20s" % w, "%0.2f" % (1 - np.exp(p))
示例10: MultinomialNB
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_log_proba [as 别名]
X_MNB_test = X_MNB_test.tocsc()
words = np.array(vectorizer.get_feature_names())
clf_Multinomial = MultinomialNB(alpha=0.1).fit(X_MNB, y_train)
#X_identity = np.eye(X_MNB_test.shape[1])
#X_identity = X_identity.tocsc()
print(X_MNB_test.shape[1])
print(X_MNB_test.shape[0])
from scipy.sparse import identity
X_identity = sparse.csr_matrix(identity(X_MNB_test.shape[1]).toarray())
probs = clf_Multinomial.predict_log_proba(X_identity)[:, 0]
ind = np.argsort(probs)
good_words = words[ind[:10]]
bad_words = words[ind[-10:]]
good_prob = probs[ind[:10]]
bad_prob = probs[ind[-10:]]
print("Good words\t P(fresh | word)")
for w, p in zip(good_words, good_prob):
print("{:>20}".format(w), "{:.2f}".format(1 - np.exp(p)))
print("Bad words\t P(fresh | word)")
for w, p in zip(bad_words, bad_prob):
print("{:>20}".format(w), "{:.2f}".format(1 - np.exp(p)))
示例11: CountVectorizer
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_log_proba [as 别名]
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(training_set.data)
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
clf = MultinomialNB(alpha=1.0,fit_prior=False).fit(X_train_tfidf,training_set.target)
testing_set = load_files('/home/dhrumil/Desktop/PoliticalFraming/data/testing',categories=categories,shuffle=True)
docs_test = testing_set.data
#X_new_counts = count_vect.transform(docs_test)
#X_new_tfidf = tfidf_transformer.transform(X_new_counts)
##
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5, stop_words='english')
X_new_tfidf = vectorizer.fit_transform(testing_set.data)
##
predicted = clf.predict(X_new_tfidf)
predicted_logs = clf.predict_log_proba(X_new_tfidf)
print np.mean(predicted == testing_set.target)
#f = open('classifyFiles.txt','w')
#f.write('The topics are in the following order:\n'+str(categories)+'\n\n')
#f.write('The predicted values for the training set:\n'+str(predicted)+'\n\n')
#f.write('The actual values for the training set:\n0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3 4 4 4 4 4 5 5 5 5 5 6 6 6 6 6\n\n')
#f.write('Log-Likelihoods:\n'+str(predicted_logs)+'\n\n')
#f.close()
示例12: __init__
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_log_proba [as 别名]
#.........这里部分代码省略.........
elif self.metric == 'acc':
best_alpha = float(mean_valid_acc.idxmax())
elif self.metric == 'mse':
# yet another change; just take lowest MSE
best_alpha = float(mean_valid_mse.idxmin())
else:
sys.exit('Metric ' + self.metric + ' not supported')
self.set_alpha(best_alpha)
self.trained = False
return valid_f1_summary, best_alpha
def get_coefs(self, row=0):
if self.model_type == 'default' or self.model_type == 'SVM':
return None
elif self.model_type == 'myMNB':
return zip(self.column_names, self.w)
else:
return zip(self.column_names, self.model.coef_[row])
def predict(self, X):
n, p = X.shape
if self.model_type == 'default':
predictions = self.default * np.ones(n, dtype=int)
elif self.model_type == 'myMNB':
predictions = np.array((np.dot(X.toarray(), np.array(self.w)) + self.b) > 0, dtype=int)
else:
predictions = self.model.predict(X)
return predictions
def predict_probs(self, X):
n, p = X.shape
if self.model_type == 'LR' or self.model_type == 'MNB':
log_probs = self.model.predict_log_proba(X)
else:
log_probs = np.ones(shape=[n, 1])
return np.exp(log_probs)
def predict_max_probs(self, X):
n, p = X.shape
if self.model_type == 'LR' or self.model_type == 'MNB':
all_log_probs = self.model.predict_log_proba(X)
#predictions = self.predict(X)
#log_probs = np.array([all_log_probs[i, self.class_index[p]] for i, p in enumerate(predictions)])
log_probs = np.max(all_log_probs, axis=1)
#assert np.sum(log_probs == max_log_probs) == len(log_probs)
else:
log_probs = np.ones(shape=[n, 1])
return np.exp(log_probs)
def get_nonconformity_scores(self, X, y):
n, p = X.shape
if self.model_type == 'LR':
scores = np.dot(X.toarray(), self.model.coef_[0]) + self.model.intercept_[0]
# multiple the scores for y == 1 by -1
scores *= -(y*2-1)
else:
scores = np.zeros(shape=[n, 1])
return scores
def get_model_size(self):
if self.model_type == 'default':
size = 0
示例13: __init__
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_log_proba [as 别名]
#.........这里部分代码省略.........
valid_f1_summary.loc[dev_fold] = valid_f1s
if verbose > 1:
print dev_fold, valid_f1s
mean_valid_f1s = valid_f1_summary.mean(axis=0)
best_alpha = float(mean_valid_f1s.idxmax())
self.set_alpha(best_alpha)
self.trained = False
return valid_f1_summary, best_alpha
def get_coefs(self):
if self.model_type == 'default' or self.model_type == 'SVM':
return None
elif self.model_type == 'myMNB':
return zip(self.column_names, self.w)
else:
return zip(self.column_names, self.model.coef_[0])
def predict(self, X):
n, p = X.shape
if self.model_type == 'default':
predictions = self.default * np.ones(shape=[n, 1], dtype=int)
elif self.model_type == 'myMNB':
predictions = np.array((np.dot(X.toarray(), np.array(self.w)) + self.b) > 0, dtype=int)
else:
predictions = self.model.predict(X)
return predictions
def predict_p_y_eq_1(self, X):
n, p = X.shape
if self.model_type == 'LR' or self.model_type == 'MNB':
log_probs = self.model.predict_log_proba(X)[:, 1]
else:
log_probs = np.zeros(shape=[n, 1])
return log_probs
def get_nonconformity_scores(self, X, y):
n, p = X.shape
if self.model_type == 'LR':
scores = np.dot(X.toarray(), self.model.coef_[0]) + self.model.intercept_[0]
# multiple the scores for y == 1 by -1
scores *= -(y*2-1)
else:
scores = np.zeros(shape=[n, 1])
return scores
"""
def get_scores(self, X):
n, p = X.shape
if self.model_type == 'LR' or self.model_type == 'MNB':
log_probs = self.model.predict_log_proba(X)
scores = log_probs[:, 1] - log_probs[:, 0]
else:
scores = np.zeros(shape=[n, 1])
return scores
"""
def eval_f1_acc(self, X, y):
predicted = self.predict(X)
if np.isnan(predicted).any() or np.isnan(y).any():
示例14: enumerate
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_log_proba [as 别名]
m = X.shape[0]
if m < chunk:
clf.fit(X, y)
else:
for i, idx in enumerate(np.split(np.arange(m), xrange(chunk, m, chunk))):
print('\t%s\tTraining %d chunk' % (datetime.now(), (i + 1)))
clf.partial_fit(X[idx], y[idx], classes=list(categoryid_set))
# cv
print('*' * 80)
print('cross validating: ')
print('\t%s' % datetime.now())
X_cv = vectorizer.transform(cv['prodname'] + " " + cv['navigation'] +
" " + cv['merchant'] + " " + cv['brand'])
y_true = cv['categoryid'].values
jll = clf.predict_log_proba(X_cv) # joint likelihood
y_pred = clf.classes_[np.nanargmax(jll, axis=1)]
max_proba = np.nanmax(jll, axis=1)
# trade off between acurry and recall
# search best decision boundry for each category
def search():
print('*' * 80)
print('Searching: ')
with open("remove_id","r") as infile:
stop_ids = [line.strip() for line in infile]
boundary_of_category = dict()
max_p_category = np.nanmax(jll, axis=0) # max probability in each category
min_p_category = np.nanmin(jll, axis=0) # min probability in each category
for categoryid in categoryid_set:
示例15: handle
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_log_proba [as 别名]
#.........这里部分代码省略.........
data_source=positive_data_source,
ids_to_filter=event_ids,
return_ids=False))
unlabeled_and_spy_data_ids = chain(
unlabeled_data_ids, VenyooDocumentUtility.webpage_generator(
data_source=positive_data_source,
ids_to_filter=event_ids,
return_ids=True))
X_train = vectorizer.fit_transform(
chain(
VenyooDocumentUtility.webpage_generator(
data_source=positive_data_source,
ids_to_exclude=event_ids),
unlabeled_and_spy_data))
print 'X_train: ', repr(X_train), '\n'
print 'Create y_train vector of target values (=classes)...'
y_train = np.append(
np.array(number_positive_without_spy_docs * [1]),
np.array(number_unlabeled_and_spy_docs * [-1]))
print 'y_train:', y_train.shape, '\n'
print 'Create X_test matrix of token counts for testing...'
unlabeled_data = CrawledWebpageUtility.webpage_generator(
data_source=unlabeled_data_source,
exclude_positives=positive_set_extension,
filter_domains=rn_domains,
exclude_hand_labeled_pages=is_hand_labeled_data)[0]
unlabeled_and_spy_data = chain(
unlabeled_data, VenyooDocumentUtility.webpage_generator(
data_source=positive_data_source,
ids_to_filter=event_ids,
return_ids=False))
X_test = vectorizer.transform(unlabeled_and_spy_data)
print 'X_test:', repr(X_test), '\n'
print 'Create X_spy matrix to determine threshold t...'
X_spy = X_test.asformat('csr')[-len(event_ids):]
print 'Train Multinomial NB classifier...'
classifier = MultinomialNB(alpha=0.1)
classifier.fit(X=X_train, y=y_train)
print 'Create log_probabilities for X_test...'
X_test_log_proba = classifier.predict_log_proba(X_test)
print 'Create log_probabilities for X_spy...'
X_spy_log_proba = classifier.predict_log_proba(X_spy)
print 'Determine probability threshold t...'
if 0 < noise_level < 1:
# Determine number of spy documents to ignore
num_docs_to_ignore = int(noise_level * X_spy.shape[0])
print num_docs_to_ignore, \
'spy documents were labeled as noise and will be ignored...'
# Determine number of spy documents to consider for further calculation
num_docs_to_consider = X_spy.shape[0] - num_docs_to_ignore
# Create the fraction of documents and determine the threshold from it
threshold = np.sort(X_spy_log_proba.T[1])[::-1][:num_docs_to_consider].min()
else:
threshold = X_spy_log_proba.T[1].min()
print 'Threshold t =', threshold, '\n'
print 'Determine reliable negatives...'
reliable_negative_ids = set()
for doc in X_test_log_proba:
current_id = unlabeled_and_spy_data_ids.next()
if doc[1] < threshold:
reliable_negative_ids.add(current_id)
if not final_reliable_negative_ids:
final_reliable_negative_ids = final_reliable_negative_ids | reliable_negative_ids
else:
final_reliable_negative_ids = final_reliable_negative_ids & reliable_negative_ids
current_iteration += 1
iterations -= 1
# Reset all negative webpages back to 'Unlabeled'
CrawledWebpage.objects.filter(
is_spy_reliable_negative='N').update(
is_spy_reliable_negative='-')
print 'Label reliable negatives in database...'
affected_pages = CrawledWebpage.objects.filter(
id__in=final_reliable_negative_ids).update(
is_spy_reliable_negative='N')
print 'Done! Annotation of unlabeled data successful.'
print affected_pages, 'documents have been annotated as reliable negatives.'