本文整理汇总了Python中spambayes.storage.open_storage函数的典型用法代码示例。如果您正苦于以下问题:Python open_storage函数的具体用法?Python open_storage怎么用?Python open_storage使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了open_storage函数的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: open
def open(filename, useDB="dbm", mode="r"):
"""Open a file, returning a Hammie instance.
mode is used as the flag to open DBDict objects. 'c' for read-write
(create if needed), 'r' for read-only, 'w' for read-write.
"""
return Hammie(storage.open_storage(filename, useDB, mode), mode)
示例2: test_dbm_export
def test_dbm_export(self):
# Create a dbm classifier to export.
bayes = DBDictClassifier(TEMP_DBM_NAME)
# Stuff some messages in it so it's not empty.
bayes.learn(tokenize(spam1), True)
bayes.learn(tokenize(good1), False)
# Save & Close.
bayes.store()
bayes.close()
# Export.
sb_dbexpimp.runExport(TEMP_DBM_NAME, "dbm", TEMP_CSV_NAME)
# Reopen the original.
bayes = open_storage(TEMP_DBM_NAME, "dbm")
# Verify that the CSV holds all the original data (and, by using
# the CSV module to open it, that it is valid CSV data).
fp = open(TEMP_CSV_NAME, "rb")
reader = sb_dbexpimp.csv.reader(fp)
(nham, nspam) = reader.next()
self.assertEqual(int(nham), bayes.nham)
self.assertEqual(int(nspam), bayes.nspam)
for (word, hamcount, spamcount) in reader:
word = sb_dbexpimp.uunquote(word)
self.assert_(word in bayes._wordinfokeys())
wi = bayes._wordinfoget(word)
self.assertEqual(int(hamcount), wi.hamcount)
self.assertEqual(int(spamcount), wi.spamcount)
示例3: createWorkers
def createWorkers(self):
"""Using the options that were initialised in __init__ and then
possibly overridden by the driver code, create the Bayes object,
the Corpuses, the Trainers and so on."""
print "Loading database...",
if self.isTest:
self.useDB = "pickle"
self.DBName = '_pop3proxy_test.pickle' # This is never saved.
if not hasattr(self, "DBName"):
self.DBName, self.useDB = storage.database_type([])
self.bayes = storage.open_storage(self.DBName, self.useDB)
self.buildStatusStrings()
# Don't set up the caches and training objects when running the self-test,
# so as not to clutter the filesystem.
if not self.isTest:
def ensureDir(dirname):
try:
os.mkdir(dirname)
except OSError, e:
if e.errno != errno.EEXIST:
raise
# Create/open the Corpuses. Use small cache sizes to avoid hogging
# lots of memory.
sc = get_pathname_option("Storage", "spam_cache")
hc = get_pathname_option("Storage", "ham_cache")
uc = get_pathname_option("Storage", "unknown_cache")
map(ensureDir, [sc, hc, uc])
if self.gzipCache:
factory = GzipFileMessageFactory()
else:
factory = FileMessageFactory()
age = options["Storage", "cache_expiry_days"]*24*60*60
self.spamCorpus = ExpiryFileCorpus(age, factory, sc,
'[0123456789\-]*',
cacheSize=20)
self.hamCorpus = ExpiryFileCorpus(age, factory, hc,
'[0123456789\-]*',
cacheSize=20)
self.unknownCorpus = ExpiryFileCorpus(age, factory, uc,
'[0123456789\-]*',
cacheSize=20)
# Given that (hopefully) users will get to the stage
# where they do not need to do any more regular training to
# be satisfied with spambayes' performance, we expire old
# messages from not only the trained corpora, but the unknown
# as well.
self.spamCorpus.removeExpiredMessages()
self.hamCorpus.removeExpiredMessages()
self.unknownCorpus.removeExpiredMessages()
# Create the Trainers.
self.spamTrainer = storage.SpamTrainer(self.bayes)
self.hamTrainer = storage.HamTrainer(self.bayes)
self.spamCorpus.addObserver(self.spamTrainer)
self.hamCorpus.addObserver(self.hamTrainer)
示例4: open
def open(filename, useDB=True, mode="r"):
"""Open a file, returning a Hammie instance.
If usedb is False, open as a pickle instead of a DBDict. mode is
used as the flag to open DBDict objects. 'c' for read-write (create
if needed), 'r' for read-only, 'w' for read-write.
"""
return Hammie(storage.open_storage(filename, useDB, mode))
示例5: create_workers
def create_workers(self):
"""Using the options that were initialised in __init__ and then
possibly overridden by the driver code, create the Bayes object,
the Corpuses, the Trainers and so on."""
if self.is_test:
self.use_db = "pickle"
self.db_name = '_core_server.pickle' # This is never saved.
if not hasattr(self, "db_name"):
self.db_name, self.use_db = storage.database_type([])
self.bayes = storage.open_storage(self.db_name, self.use_db)
# Load stats manager.
self.stats = Stats.Stats(options,
spambayes.message.Message().message_info_db)
self.build_status_strings()
# Don't set up the caches and training objects when running the
# self-test, so as not to clutter the filesystem.
if not self.is_test:
# Create/open the Corpuses. Use small cache sizes to avoid
# hogging lots of memory.
sc = get_pathname_option("Storage", "core_spam_cache")
hc = get_pathname_option("Storage", "core_ham_cache")
uc = get_pathname_option("Storage", "core_unknown_cache")
for d in [sc, hc, uc]:
storage.ensureDir(d)
if self.gzip_cache:
factory = GzipFileMessageFactory()
else:
factory = FileMessageFactory()
age = options["Storage", "cache_expiry_days"]*24*60*60
self.spamCorpus = ExpiryFileCorpus(age, factory, sc,
'[0123456789\-]*',
cacheSize=20)
self.hamCorpus = ExpiryFileCorpus(age, factory, hc,
'[0123456789\-]*',
cacheSize=20)
self.unknownCorpus = ExpiryFileCorpus(age, factory, uc,
'[0123456789\-]*',
cacheSize=20)
# Given that (hopefully) users will get to the stage
# where they do not need to do any more regular training to
# be satisfied with spambayes' performance, we expire old
# messages from not only the trained corpora, but the unknown
# as well.
self.spamCorpus.removeExpiredMessages()
self.hamCorpus.removeExpiredMessages()
self.unknownCorpus.removeExpiredMessages()
# Create the Trainers.
self.spam_trainer = storage.SpamTrainer(self.bayes)
self.ham_trainer = storage.HamTrainer(self.bayes)
self.spamCorpus.addObserver(self.spam_trainer)
self.hamCorpus.addObserver(self.ham_trainer)
示例6: test_newdb
def test_newdb(self):
# Create an existing classifier.
b = open_storage(TEMP_DBM_NAME, "dbm")
b.learn(tokenize(spam1), True)
b.learn(tokenize(good1), False)
b.store()
b.close()
# Create the fresh classifier.
self.h.newdb()
# Verify that the classifier isn't open.
self.assertEqual(self.h.h, None)
# Verify that any existing classifier with the same name
# is overwritten.
b = open_storage(TEMP_DBM_NAME, "dbm")
self.assertEqual(b.nham, 0)
self.assertEqual(b.nspam, 0)
b.close()
示例7: testNoDBMAvailable
def testNoDBMAvailable(self):
import tempfile
from spambayes.storage import open_storage
db_name = tempfile.mktemp("nodbmtest")
DBDictClassifier_load = DBDictClassifier.load
DBDictClassifier.load = self._fail_open_best
# Redirect sys.stderr, as open_storage() prints a msg to stderr.
# Then it does sys.exit(), which we catch.
sys_stderr = sys.stderr
sys.stderr = StringIO.StringIO()
try:
try:
open_storage(db_name, "dbm")
except SystemExit:
pass
else:
self.fail("expected SystemExit from open_storage() call")
finally:
DBDictClassifier.load = DBDictClassifier_load
sys.stderr = sys_stderr
if os.path.isfile(db_name):
os.remove(db_name)
示例8: testNoDBMAvailable
def testNoDBMAvailable(self):
import tempfile
from spambayes.storage import open_storage
DBDictClassifier_load = DBDictClassifier.load
DBDictClassifier.load = self.fail_open_best
sys_exit = sys.exit
sys.exit = self.success
self.succeeded = False
db_name = tempfile.mktemp("nodbmtest")
s = open_storage(db_name, True)
DBDictClassifier.load = DBDictClassifier_load
sys.exit = sys_exit
if not self.succeeded:
self.fail()
if os.path.isfile(db_name):
os.remove(db_name)
示例9: test_merge_to_dbm
def test_merge_to_dbm(self):
# Create a dbm classifier to merge with.
bayes = DBDictClassifier(TEMP_DBM_NAME)
# Stuff some messages in it so it's not empty.
bayes.learn(tokenize(spam1), True)
bayes.learn(tokenize(good1), False)
# Save data to check against.
original_nham = bayes.nham
original_nspam = bayes.nspam
original_data = {}
for key in bayes._wordinfokeys():
original_data[key] = bayes._wordinfoget(key)
# Save & Close.
bayes.store()
bayes.close()
# Create a CSV file to import.
nham, nspam = 3,4
temp = open(TEMP_CSV_NAME, "wb")
temp.write("%d,%d\n" % (nham, nspam))
csv_data = {"this":(2,1), "is":(0,1), "a":(3,4), 'test':(1,1),
"of":(1,0), "the":(1,2), "import":(3,1)}
for word, (ham, spam) in csv_data.items():
temp.write("%s,%s,%s\n" % (word, ham, spam))
temp.close()
sb_dbexpimp.runImport(TEMP_DBM_NAME, "dbm", False, TEMP_CSV_NAME)
# Open the converted file and verify that it has all the data from
# the CSV file (and by opening it, that it is a valid dbm file),
# and the data from the original dbm database.
bayes2 = open_storage(TEMP_DBM_NAME, "dbm")
self.assertEqual(bayes2.nham, nham + original_nham)
self.assertEqual(bayes2.nspam, nspam + original_nspam)
words = original_data.keys()[:]
words.extend(csv_data.keys())
for word in words:
word = sb_dbexpimp.uquote(word)
self.assert_(word in bayes2._wordinfokeys())
h, s = csv_data.get(word, (0,0))
wi = original_data.get(word, None)
if wi:
h += wi.hamcount
s += wi.spamcount
wi2 = bayes2._wordinfoget(word)
self.assertEqual(h, wi2.hamcount)
self.assertEqual(s, wi2.spamcount)
示例10: test_import_to_dbm
def test_import_to_dbm(self):
# Create a CSV file to import.
temp = open(TEMP_CSV_NAME, "wb")
temp.write("3,4\n")
csv_data = {"this":(2,1), "is":(0,1), "a":(3,4), 'test':(1,1),
"of":(1,0), "the":(1,2), "import":(3,1)}
for word, (ham, spam) in csv_data.items():
temp.write("%s,%s,%s\n" % (word, ham, spam))
temp.close()
sb_dbexpimp.runImport(TEMP_DBM_NAME, "dbm", True, TEMP_CSV_NAME)
# Open the converted file and verify that it has all the data from
# the CSV file (and by opening it, that it is a valid dbm file).
bayes = open_storage(TEMP_DBM_NAME, "dbm")
self.assertEqual(bayes.nham, 3)
self.assertEqual(bayes.nspam, 4)
for word, (ham, spam) in csv_data.items():
word = sb_dbexpimp.uquote(word)
self.assert_(word in bayes._wordinfokeys())
wi = bayes._wordinfoget(word)
self.assertEqual(wi.hamcount, ham)
self.assertEqual(wi.spamcount, spam)
示例11: hammer
def hammer():
"""Trains and classifies repeatedly."""
global bayes
wellFlushed = False
for i in range(1, 1000000):
# Train.
isSpam = random.choice([True, False])
train(makeMessage(isSpam), isSpam)
# Every thousand messages or so, flush the DB to disk.
if random.randrange(1000) == 1:
print "Flushing."
bayes.store()
if i > 500:
wellFlushed = True
# Classify.
isSpam = random.choice([True, False])
prob = classify(makeMessage(isSpam))
if i < 10 or i % 100 == 0:
print "%6.6d: %d, %.4f" % (i, isSpam, prob)
# Every thousand messages or so, reopen the DB without closing it.
# The way this works will open the new instance before the existing
# one goes away, which can cause a DBRunRecoveryError. Versions up
# to 1.0a5 had a bug in that did this, but people were still
# reporting DBRunRecoveryErrors in 1.0a6, so I don't think we can
# call it fixed.
# We don't do this within the first few hundred messages, or before
# the DB has been flushed, because that can give a "hamcount > nham"
# error. Despite this, you still see those errors. Either I've got
# something badly wrong, or they're the result of corrupt databases
# that aren't caught by bsddb and turned into DBRunRecoveryErrors.
if wellFlushed and random.randrange(1000) == 1:
print "Re-opening."
bayes = storage.open_storage(FILENAME, True)
示例12: usage
try:
opts, args = getopt.getopt(sys.argv[1:], "hd:p:o:")
except getopt.error, msg:
usage(2, msg)
options = Options.options
for opt, arg in opts:
if opt == "-h":
usage(0)
elif opt == "-o":
options.set_from_cmdline(arg, sys.stderr)
dbname, usedb = storage.database_type(opts)
if len(args) != 1:
usage(2, "IP:PORT not specified")
ip, port = args[0].split(":")
port = int(port)
bayes = storage.open_storage(dbname, usedb)
h = XMLHammie(bayes)
server = ReusableSimpleXMLRPCServer((ip, port), SimpleXMLRPCServer.SimpleXMLRPCRequestHandler)
server.register_instance(h)
server.serve_forever()
if __name__ == "__main__":
main()
示例13: train
__author__ = "Richie Hindle <[email protected]>"
headerTemplate = """To: %(To)s
From: %(From)s
Subject: %(Subject)s
Date: %(Date)s
"""
# Create a fresh bayes object to train and classify.
FILENAME = "__hammer.db"
try:
os.remove(FILENAME)
except OSError:
pass
bayes = storage.open_storage(FILENAME, True)
def train(text, isSpam):
"""Trains the classifier on the given text."""
tokens = tokenizer.tokenize(text)
bayes.learn(tokens, isSpam)
def classify(text):
"""Classifies the given text, returning the spamprob."""
tokens = tokenizer.tokenize(text)
return bayes.spamprob(tokens)
def makeMessage(isSpam):
示例14: open_spamdb
def open_spamdb(self, request):
if self.sbayes is None:
event_log = request.rootpage.getPagePath('event-log', isfile=1)
spam_db = os.path.join(os.path.dirname(event_log), self.spam_db)
self.sbayes = Hammie(storage.open_storage(spam_db, "pickle", 'c'))
atexit.register(self.close_spamdb)