当前位置: 首页>>代码示例>>Python>>正文


Python storage.open_storage函数代码示例

本文整理汇总了Python中spambayes.storage.open_storage函数的典型用法代码示例。如果您正苦于以下问题:Python open_storage函数的具体用法?Python open_storage怎么用?Python open_storage使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了open_storage函数的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: open

def open(filename, useDB="dbm", mode="r"):
    """Open a file, returning a Hammie instance.

    mode is used as the flag to open DBDict objects.  'c' for read-write
    (create if needed), 'r' for read-only, 'w' for read-write.
    """
    return Hammie(storage.open_storage(filename, useDB, mode), mode)
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:7,代码来源:hammie.py

示例2: test_dbm_export

 def test_dbm_export(self):
     # Create a dbm classifier to export.
     bayes = DBDictClassifier(TEMP_DBM_NAME)
     # Stuff some messages in it so it's not empty.
     bayes.learn(tokenize(spam1), True)
     bayes.learn(tokenize(good1), False)
     # Save & Close.
     bayes.store()
     bayes.close()
     # Export.
     sb_dbexpimp.runExport(TEMP_DBM_NAME, "dbm", TEMP_CSV_NAME)
     # Reopen the original.
     bayes = open_storage(TEMP_DBM_NAME, "dbm")
     # Verify that the CSV holds all the original data (and, by using
     # the CSV module to open it, that it is valid CSV data).
     fp = open(TEMP_CSV_NAME, "rb")
     reader = sb_dbexpimp.csv.reader(fp)
     (nham, nspam) = reader.next()
     self.assertEqual(int(nham), bayes.nham)
     self.assertEqual(int(nspam), bayes.nspam)
     for (word, hamcount, spamcount) in reader:
         word = sb_dbexpimp.uunquote(word)
         self.assert_(word in bayes._wordinfokeys())
         wi = bayes._wordinfoget(word)
         self.assertEqual(int(hamcount), wi.hamcount)
         self.assertEqual(int(spamcount), wi.spamcount)
开发者ID:bloggse,项目名称:spambayes-lite,代码行数:26,代码来源:test_sb_dbexpimp.py

示例3: createWorkers

    def createWorkers(self):
        """Using the options that were initialised in __init__ and then
        possibly overridden by the driver code, create the Bayes object,
        the Corpuses, the Trainers and so on."""
        print "Loading database...",
        if self.isTest:
            self.useDB = "pickle"
            self.DBName = '_pop3proxy_test.pickle'   # This is never saved.
        if not hasattr(self, "DBName"):
            self.DBName, self.useDB = storage.database_type([])
        self.bayes = storage.open_storage(self.DBName, self.useDB)
        
        self.buildStatusStrings()

        # Don't set up the caches and training objects when running the self-test,
        # so as not to clutter the filesystem.
        if not self.isTest:
            def ensureDir(dirname):
                try:
                    os.mkdir(dirname)
                except OSError, e:
                    if e.errno != errno.EEXIST:
                        raise

            # Create/open the Corpuses.  Use small cache sizes to avoid hogging
            # lots of memory.
            sc = get_pathname_option("Storage", "spam_cache")
            hc = get_pathname_option("Storage", "ham_cache")
            uc = get_pathname_option("Storage", "unknown_cache")
            map(ensureDir, [sc, hc, uc])
            if self.gzipCache:
                factory = GzipFileMessageFactory()
            else:
                factory = FileMessageFactory()
            age = options["Storage", "cache_expiry_days"]*24*60*60
            self.spamCorpus = ExpiryFileCorpus(age, factory, sc,
                                               '[0123456789\-]*',
                                               cacheSize=20)
            self.hamCorpus = ExpiryFileCorpus(age, factory, hc,
                                              '[0123456789\-]*',
                                              cacheSize=20)
            self.unknownCorpus = ExpiryFileCorpus(age, factory, uc,
                                                  '[0123456789\-]*',
                                                  cacheSize=20)

            # Given that (hopefully) users will get to the stage
            # where they do not need to do any more regular training to
            # be satisfied with spambayes' performance, we expire old
            # messages from not only the trained corpora, but the unknown
            # as well.
            self.spamCorpus.removeExpiredMessages()
            self.hamCorpus.removeExpiredMessages()
            self.unknownCorpus.removeExpiredMessages()

            # Create the Trainers.
            self.spamTrainer = storage.SpamTrainer(self.bayes)
            self.hamTrainer = storage.HamTrainer(self.bayes)
            self.spamCorpus.addObserver(self.spamTrainer)
            self.hamCorpus.addObserver(self.hamTrainer)
开发者ID:Xodarap,项目名称:Eipi,代码行数:59,代码来源:sb_server.py

示例4: open

def open(filename, useDB=True, mode="r"):
    """Open a file, returning a Hammie instance.

    If usedb is False, open as a pickle instead of a DBDict.  mode is

    used as the flag to open DBDict objects.  'c' for read-write (create
    if needed), 'r' for read-only, 'w' for read-write.
    """
    return Hammie(storage.open_storage(filename, useDB, mode))
开发者ID:ArildF,项目名称:rogie,代码行数:9,代码来源:hammie.py

示例5: create_workers

    def create_workers(self):
        """Using the options that were initialised in __init__ and then
        possibly overridden by the driver code, create the Bayes object,
        the Corpuses, the Trainers and so on."""
        if self.is_test:
            self.use_db = "pickle"
            self.db_name = '_core_server.pickle'   # This is never saved.
        if not hasattr(self, "db_name"):
            self.db_name, self.use_db = storage.database_type([])
        self.bayes = storage.open_storage(self.db_name, self.use_db)

        # Load stats manager.
        self.stats = Stats.Stats(options,
                                 spambayes.message.Message().message_info_db)

        self.build_status_strings()

        # Don't set up the caches and training objects when running the
        # self-test, so as not to clutter the filesystem.
        if not self.is_test:
            # Create/open the Corpuses.  Use small cache sizes to avoid
            # hogging lots of memory.
            sc = get_pathname_option("Storage", "core_spam_cache")
            hc = get_pathname_option("Storage", "core_ham_cache")
            uc = get_pathname_option("Storage", "core_unknown_cache")
            for d in [sc, hc, uc]:
                storage.ensureDir(d)
            if self.gzip_cache:
                factory = GzipFileMessageFactory()
            else:
                factory = FileMessageFactory()
            age = options["Storage", "cache_expiry_days"]*24*60*60
            self.spamCorpus = ExpiryFileCorpus(age, factory, sc,
                                               '[0123456789\-]*',
                                               cacheSize=20)
            self.hamCorpus = ExpiryFileCorpus(age, factory, hc,
                                              '[0123456789\-]*',
                                              cacheSize=20)
            self.unknownCorpus = ExpiryFileCorpus(age, factory, uc,
                                                  '[0123456789\-]*',
                                                  cacheSize=20)

            # Given that (hopefully) users will get to the stage
            # where they do not need to do any more regular training to
            # be satisfied with spambayes' performance, we expire old
            # messages from not only the trained corpora, but the unknown
            # as well.
            self.spamCorpus.removeExpiredMessages()
            self.hamCorpus.removeExpiredMessages()
            self.unknownCorpus.removeExpiredMessages()

            # Create the Trainers.
            self.spam_trainer = storage.SpamTrainer(self.bayes)
            self.ham_trainer = storage.HamTrainer(self.bayes)
            self.spamCorpus.addObserver(self.spam_trainer)
            self.hamCorpus.addObserver(self.ham_trainer)
开发者ID:dbrandt,项目名称:spambayes-lite,代码行数:56,代码来源:CoreUI.py

示例6: test_newdb

    def test_newdb(self):
        # Create an existing classifier.
        b = open_storage(TEMP_DBM_NAME, "dbm")
        b.learn(tokenize(spam1), True)
        b.learn(tokenize(good1), False)
        b.store()
        b.close()

        # Create the fresh classifier.        
        self.h.newdb()
        
        # Verify that the classifier isn't open.
        self.assertEqual(self.h.h, None)

        # Verify that any existing classifier with the same name
        # is overwritten.
        b = open_storage(TEMP_DBM_NAME, "dbm")
        self.assertEqual(b.nham, 0)
        self.assertEqual(b.nspam, 0)
        b.close()
开发者ID:Xodarap,项目名称:Eipi,代码行数:20,代码来源:test_sb_filter.py

示例7: testNoDBMAvailable

    def testNoDBMAvailable(self):
        import tempfile
        from spambayes.storage import open_storage

        db_name = tempfile.mktemp("nodbmtest")
        DBDictClassifier_load = DBDictClassifier.load
        DBDictClassifier.load = self._fail_open_best
        # Redirect sys.stderr, as open_storage() prints a msg to stderr.
        # Then it does sys.exit(), which we catch.
        sys_stderr = sys.stderr
        sys.stderr = StringIO.StringIO()
        try:
            try:
                open_storage(db_name, "dbm")
            except SystemExit:
                pass
            else:
                self.fail("expected SystemExit from open_storage() call")
        finally:
            DBDictClassifier.load = DBDictClassifier_load
            sys.stderr = sys_stderr

        if os.path.isfile(db_name):
            os.remove(db_name)
开发者ID:Xodarap,项目名称:Eipi,代码行数:24,代码来源:test_storage.py

示例8: testNoDBMAvailable

 def testNoDBMAvailable(self):
     import tempfile
     from spambayes.storage import open_storage
     DBDictClassifier_load = DBDictClassifier.load
     DBDictClassifier.load = self.fail_open_best
     sys_exit = sys.exit
     sys.exit = self.success
     self.succeeded = False
     db_name = tempfile.mktemp("nodbmtest")
     s = open_storage(db_name, True)
     DBDictClassifier.load = DBDictClassifier_load
     sys.exit = sys_exit
     if not self.succeeded:
         self.fail()
     if os.path.isfile(db_name):
         os.remove(db_name)
开发者ID:ArildF,项目名称:rogie,代码行数:16,代码来源:test_storage.py

示例9: test_merge_to_dbm

 def test_merge_to_dbm(self):
     # Create a dbm classifier to merge with.
     bayes = DBDictClassifier(TEMP_DBM_NAME)
     # Stuff some messages in it so it's not empty.
     bayes.learn(tokenize(spam1), True)
     bayes.learn(tokenize(good1), False)
     # Save data to check against.
     original_nham = bayes.nham
     original_nspam = bayes.nspam
     original_data = {}
     for key in bayes._wordinfokeys():
         original_data[key] = bayes._wordinfoget(key)
     # Save & Close.
     bayes.store()
     bayes.close()
     # Create a CSV file to import.
     nham, nspam = 3,4
     temp = open(TEMP_CSV_NAME, "wb")
     temp.write("%d,%d\n" % (nham, nspam))
     csv_data = {"this":(2,1), "is":(0,1), "a":(3,4), 'test':(1,1),
                 "of":(1,0), "the":(1,2), "import":(3,1)}
     for word, (ham, spam) in csv_data.items():
         temp.write("%s,%s,%s\n" % (word, ham, spam))
     temp.close()
     sb_dbexpimp.runImport(TEMP_DBM_NAME, "dbm", False, TEMP_CSV_NAME)
     # Open the converted file and verify that it has all the data from
     # the CSV file (and by opening it, that it is a valid dbm file),
     # and the data from the original dbm database.
     bayes2 = open_storage(TEMP_DBM_NAME, "dbm")
     self.assertEqual(bayes2.nham, nham + original_nham)
     self.assertEqual(bayes2.nspam, nspam + original_nspam)
     words = original_data.keys()[:]
     words.extend(csv_data.keys())
     for word in words:
         word = sb_dbexpimp.uquote(word)
         self.assert_(word in bayes2._wordinfokeys())
         h, s = csv_data.get(word, (0,0))
         wi = original_data.get(word, None)
         if wi:
             h += wi.hamcount
             s += wi.spamcount
         wi2 = bayes2._wordinfoget(word)
         self.assertEqual(h, wi2.hamcount)
         self.assertEqual(s, wi2.spamcount)
开发者ID:bloggse,项目名称:spambayes-lite,代码行数:44,代码来源:test_sb_dbexpimp.py

示例10: test_import_to_dbm

 def test_import_to_dbm(self):
     # Create a CSV file to import.
     temp = open(TEMP_CSV_NAME, "wb")
     temp.write("3,4\n")
     csv_data = {"this":(2,1), "is":(0,1), "a":(3,4), 'test':(1,1),
                 "of":(1,0), "the":(1,2), "import":(3,1)}
     for word, (ham, spam) in csv_data.items():
         temp.write("%s,%s,%s\n" % (word, ham, spam))
     temp.close()
     sb_dbexpimp.runImport(TEMP_DBM_NAME, "dbm", True, TEMP_CSV_NAME)
     # Open the converted file and verify that it has all the data from
     # the CSV file (and by opening it, that it is a valid dbm file).
     bayes = open_storage(TEMP_DBM_NAME, "dbm")
     self.assertEqual(bayes.nham, 3)
     self.assertEqual(bayes.nspam, 4)
     for word, (ham, spam) in csv_data.items():
         word = sb_dbexpimp.uquote(word)
         self.assert_(word in bayes._wordinfokeys())
         wi = bayes._wordinfoget(word)
         self.assertEqual(wi.hamcount, ham)
         self.assertEqual(wi.spamcount, spam)
开发者ID:bloggse,项目名称:spambayes-lite,代码行数:21,代码来源:test_sb_dbexpimp.py

示例11: hammer

def hammer():
    """Trains and classifies repeatedly."""
    global bayes
    wellFlushed = False
    for i in range(1, 1000000):
        # Train.
        isSpam = random.choice([True, False])
        train(makeMessage(isSpam), isSpam)

        # Every thousand messages or so, flush the DB to disk.
        if random.randrange(1000) == 1:
            print "Flushing."
            bayes.store()
            if i > 500:
                wellFlushed = True

        # Classify.
        isSpam = random.choice([True, False])
        prob = classify(makeMessage(isSpam))
        if i < 10 or i % 100 == 0:
            print "%6.6d: %d, %.4f" % (i, isSpam, prob)

        # Every thousand messages or so, reopen the DB without closing it.
        # The way this works will open the new instance before the existing
        # one goes away, which can cause a DBRunRecoveryError.  Versions up
        # to 1.0a5 had a bug in that did this, but people were still
        # reporting DBRunRecoveryErrors in 1.0a6, so I don't think we can
        # call it fixed.

        # We don't do this within the first few hundred messages, or before
        # the DB has been flushed, because that can give a "hamcount > nham"
        # error.  Despite this, you still see those errors.  Either I've got
        # something badly wrong, or they're the result of corrupt databases
        # that aren't caught by bsddb and turned into DBRunRecoveryErrors.
        if wellFlushed and random.randrange(1000) == 1:
            print "Re-opening."
            bayes = storage.open_storage(FILENAME, True)
开发者ID:Xodarap,项目名称:Eipi,代码行数:37,代码来源:hammer.py

示例12: usage

    try:
        opts, args = getopt.getopt(sys.argv[1:], "hd:p:o:")
    except getopt.error, msg:
        usage(2, msg)

    options = Options.options

    for opt, arg in opts:
        if opt == "-h":
            usage(0)
        elif opt == "-o":
            options.set_from_cmdline(arg, sys.stderr)
    dbname, usedb = storage.database_type(opts)

    if len(args) != 1:
        usage(2, "IP:PORT not specified")

    ip, port = args[0].split(":")
    port = int(port)

    bayes = storage.open_storage(dbname, usedb)
    h = XMLHammie(bayes)

    server = ReusableSimpleXMLRPCServer((ip, port), SimpleXMLRPCServer.SimpleXMLRPCRequestHandler)
    server.register_instance(h)
    server.serve_forever()


if __name__ == "__main__":
    main()
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:30,代码来源:sb_xmlrpcserver.py

示例13: train

__author__ = "Richie Hindle <[email protected]>"

headerTemplate = """To: %(To)s
From: %(From)s
Subject: %(Subject)s
Date: %(Date)s

"""

# Create a fresh bayes object to train and classify.
FILENAME = "__hammer.db"
try:
    os.remove(FILENAME)
except OSError:
    pass
bayes = storage.open_storage(FILENAME, True)


def train(text, isSpam):
    """Trains the classifier on the given text."""
    tokens = tokenizer.tokenize(text)
    bayes.learn(tokens, isSpam)


def classify(text):
    """Classifies the given text, returning the spamprob."""
    tokens = tokenizer.tokenize(text)
    return bayes.spamprob(tokens)


def makeMessage(isSpam):
开发者ID:Xodarap,项目名称:Eipi,代码行数:31,代码来源:hammer.py

示例14: open_spamdb

 def open_spamdb(self, request):
     if self.sbayes is None:
         event_log = request.rootpage.getPagePath('event-log', isfile=1)
         spam_db = os.path.join(os.path.dirname(event_log), self.spam_db)
         self.sbayes = Hammie(storage.open_storage(spam_db, "pickle", 'c'))
         atexit.register(self.close_spamdb)
开发者ID:GymWenFLL,项目名称:tpp_libs,代码行数:6,代码来源:MoinSecurityPolicy.py


注:本文中的spambayes.storage.open_storage函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。