本文整理汇总了Python中corpus.Corpus.emails方法的典型用法代码示例。如果您正苦于以下问题:Python Corpus.emails方法的具体用法?Python Corpus.emails怎么用?Python Corpus.emails使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类corpus.Corpus
的用法示例。
在下文中一共展示了Corpus.emails方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import emails [as 别名]
def test(self, path):
corp = Corpus(path)
bs = Bayesian()
count = 0
sender_bl = load_pickle('sender_bl.pickle')
# scan email and define if msg is SPAM or HAM
# first check if sender occurs in sender Blacklist
# then count spamicity of the word using the Bayes approach
for fname, body in corp.emails():
sender = find_sender(body)
if sender in sender_bl:
self.tag_it(path, fname, 'SPAM')
continue
spamicity_list = []
count += 1
tokens = tokenize(body)
# compute spamicity for each word and create list of the values
for el in tokens:
word_spamicity = [el, bs.word_spamicity(el)]
spamicity_list.append(word_spamicity)
# prepare list for Bayes
spamicity_list = [list(i) for i in set(map(tuple, spamicity_list))] # remove duplicates from list
spamicity_list.sort(key=lambda x: abs(0.5 - x[1]), reverse=True)
prediction = bs.bayes_pred(spamicity_list[:15]) # Consider only 15 'words'
if prediction > 0.9 or sender in sender_bl:
self.tag_it(path, fname, 'SPAM')
else:
self.tag_it(path, fname, 'OK')
示例2: test_corpusContainsOnlyEmails
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import emails [as 别名]
def test_corpusContainsOnlyEmails(self):
"""Test reading the corpus with email messages only."""
corpus = Corpus(CORPUS_DIR)
# Exercise the SUT
observed = {}
for fname, contents in corpus.emails():
observed[fname] = contents
# Verify the results
self.assertEqual(len(self.expected), len(observed),
'The emails() method did not generate all the corpus files.')
self.assertEqual(self.expected, observed,
'The read file contents are not equal to the expected contents.')
示例3: test
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import emails [as 别名]
def test(self, test_corpus_dir):
test_corpus = Corpus(test_corpus_dir)
with open(os.path.join(test_corpus_dir, "!prediction.txt"), "w+") as a_file:
for filename, body in test_corpus.emails():
if self.bayesian_combination(body) > 0.9 or self.get_email_adress(body) in self.black_list:
decision = "SPAM"
else:
if self.get_email_adress(body) in self.white_list:
decision = "OK"
else:
decision = "OK"
a_file.write(filename + " " + decision + "\n")
示例4: test_corpusContainsAlsoSpecialFiles
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import emails [as 别名]
def test_corpusContainsAlsoSpecialFiles(self):
"""Test reading the corpus with special files."""
# Add a special file into the corpus dir
save_file_to_corpus_dir(
fname=SPECIAL_FILENAME, contents='fake', dirname=CORPUS_DIR)
corpus = Corpus(CORPUS_DIR)
# Exercise the SUT
observed = {}
for fname, contents in corpus.emails():
observed[fname] = contents
# Verify the results
self.assertEqual(len(self.expected), len(observed),
'The emails() method did not generate all the corpus files.')
self.assertEqual(self.expected, observed,
'The read file contents are not equal to the expected contents.')
示例5: set_truth
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import emails [as 别名]
def set_truth(path):
f = open(os.path.join(path, "!truth.txt"), 'wt')
a = Corpus('/Users/eygene/Desktop/spam-data-12-s75-h25/3')
for name, body in a.emails():
f.write(name + ' ' + 'SPAM' + '\n')