当前位置: 首页>>代码示例>>Python>>正文


Python mboxutils.getmbox函数代码示例

本文整理汇总了Python中spambayes.mboxutils.getmbox函数的典型用法代码示例。如果您正苦于以下问题:Python getmbox函数的具体用法?Python getmbox怎么用?Python getmbox使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了getmbox函数的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: score

def score(h, msgs, reverse=0):
    """Score (judge) all messages from a mailbox."""
    # XXX The reporting needs work!
    mbox = mboxutils.getmbox(msgs)
    i = 0
    spams = hams = unsures = 0
    for msg in mbox:
        i += 1
        prob, clues = h.score(msg, True)
        if hasattr(msg, '_mh_msgno'):
            msgno = msg._mh_msgno
        else:
            msgno = i
        isspam = (prob >= SPAM_THRESHOLD)
        isham = (prob <= HAM_THRESHOLD)
        if isspam:
            spams += 1
            if not reverse:
                print "%6s %4.2f %1s" % (msgno, prob, isspam and "S" or "."),
                print h.formatclues(clues)
        elif isham:
            hams += 1
            if reverse:
                print "%6s %4.2f %1s" % (msgno, prob, isham and "S" or "."),
                print h.formatclues(clues)
        else:
            unsures += 1
            print "%6s %4.2f U" % (msgno, prob),
            print h.formatclues(clues)
    return (spams, hams, unsures)
开发者ID:bloggse,项目名称:spambayes-lite,代码行数:30,代码来源:hammiebulk.py

示例2: cull

def cull(mbox_name, cullext, designation, tdict):
    print "writing new %s mbox..." % designation
    n = m = 0
    if cullext:
        culled_mbox = file(mbox_name + cullext, "w")
        
    for msg in mboxutils.getmbox(mbox_name):
        m += 1
        if msg["message-id"] in tdict:
            if cullext:
                culled_mbox.write(str(msg))
            n += 1
        elif not cullext:
            response = msg.imap_server.uid(
                "STORE", msg.uid, "+FLAGS.SILENT", "(\\Deleted \\Seen)")
            command = "set %s to be deleted and seen" % (msg.uid,)
            msg.imap_server.check_response(command, response)
        
        sys.stdout.write("\r%5d of %5d" % (n, m))
        sys.stdout.flush()
        
    sys.stdout.write("\n")
    
    if cullext:
        culled_mbox.close()
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:25,代码来源:tte.py

示例3: mapmessages

def mapmessages(f, mboxtype, mapdb):
    i = 0
    for msg in getmbox(f):
        i += 1
        sys.stdout.write('\r%s: %d' % (f, i))
        sys.stdout.flush()
        msgid = msg.get("message-id")
        if msgid is None:
            continue
        for t in tokenize(msg):
            ham, spam = mapdb.get(t, ({}, {}))
            if mboxtype == "ham":
                msgids = ham.get(f, set())
                msgids.add(msgid)
                ham[f] = msgids
            else:
                msgids = spam.get(f, set())
                msgids.add(msgid)
                spam[f] = msgids
            mapdb[t] = (ham, spam)
        if options["Classifier", "x-use_bigrams"]:
            for t in Classifier()._enhance_wordstream(tokenize(msg)):
                ham, spam = mapdb.get(t, ({}, {}))
                if mboxtype == "ham":
                    msgids = ham.get(f, set())
                    msgids.add(msgid)
                    ham[f] = msgids
                else:
                    msgids = spam.get(f, set())
                    msgids.add(msgid)
                    spam[f] = msgids
                mapdb[t] = (ham, spam)
    sys.stdout.write("\n")
开发者ID:ehuelsmann,项目名称:spambayes,代码行数:33,代码来源:mkreversemap.py

示例4: extractmessages

def extractmessages(features, mapdb, hamfile, spamfile):
    """extract messages which contain given features"""
    hamids = {}
    spamids = {}

    for feature in features:
        ham, spam = mapdb.get(feature, ([], []))
        if hamfile is not None:
            for mbox in ham:
                msgids = hamids.get(mbox, set())
                msgids.update(ham.get(mbox, set()))
                hamids[mbox] = msgids
        if spamfile is not None:
            for mbox in spam:
                msgids = spamids.get(mbox, set())
                msgids.update(spam.get(mbox, set()))
                spamids[mbox] = msgids

    # now run through each mailbox in hamids and spamids and print
    # matching messages to relevant ham or spam files
    for mailfile in hamids:
        i = 0
        msgids = hamids[mailfile]
        for msg in getmbox(mailfile):
            if msg.get("message-id") in msgids:
                i += 1
                sys.stdout.write('\r%s: %5d' % (mailfile, i))
                sys.stdout.flush()
                print >> hamfile, msg
    print

    for mailfile in spamids:
        i = 0
        msgids = spamids[mailfile]
        for msg in getmbox(mailfile):
            if msg.get("message-id") in msgids:
                i += 1
                sys.stdout.write('\r%s: %5d' % (mailfile, i))
                sys.stdout.flush()
                print >> spamfile, msg
    print
开发者ID:ehuelsmann,项目名称:spambayes,代码行数:41,代码来源:extractmessages.py

示例5: main

def main():
    h = HammieFilter()
    actions = []
    opts, args = getopt.getopt(sys.argv[1:], 'hxd:p:nfgstGSo:',
                               ['help', 'examples', 'option='])
    create_newdb = False
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage(0)
        elif opt in ('-x', '--examples'):
            examples()
        elif opt in ('-o', '--option'):
            Options.options.set_from_cmdline(arg, sys.stderr)
        elif opt == '-f':
            actions.append(h.filter)
        elif opt == '-g':
            actions.append(h.train_ham)
        elif opt == '-s':
            actions.append(h.train_spam)
        elif opt == '-t':
            actions.append(h.filter_train)
        elif opt == '-G':
            actions.append(h.untrain_ham)
        elif opt == '-S':
            actions.append(h.untrain_spam)
        elif opt == "-n":
            create_newdb = True
    h.dbname, h.usedb = storage.database_type(opts)

    if create_newdb:
        h.newdb()
        sys.exit(0)

    if actions == []:
        actions = [h.filter]

    if not args:
        args = ["-"]
    for fname in args:
        mbox = mboxutils.getmbox(fname)
        for msg in mbox:
            for action in actions:
                action(msg)
                if args == ["-"]:
                    unixfrom = msg.get_unixfrom() is not None
                else:
                    unixfrom = True
            result = mboxutils.as_string(msg, unixfrom=unixfrom)
            sys.stdout.write(result)
开发者ID:Xodarap,项目名称:Eipi,代码行数:49,代码来源:sb_filter.py

示例6: train

def train(store, ham, spam, maxmsgs, maxrounds, tdict, reverse, verbose):
    smisses = hmisses = round = 0
    ham_cutoff = Options.options["Categorization", "ham_cutoff"]
    spam_cutoff = Options.options["Categorization", "spam_cutoff"]

    while round < maxrounds and (hmisses or smisses or round == 0):
        hambone = mboxutils.getmbox(ham)
        spamcan = mboxutils.getmbox(spam)
        if reverse:
            hambone = reversed(list(hambone))
            spamcan = reversed(list(spamcan))
        round += 1

        if verbose:
            print >> sys.stderr, "*** round", round, "***"

        hmisses = smisses = nmsgs = 0
        start = datetime.datetime.now()
        try:
            while not maxmsgs or nmsgs < maxmsgs:
                hammsg = hambone.next()
                spammsg = spamcan.next()

                nmsgs += 2
                sys.stdout.write("\r%5d" % nmsgs)
                sys.stdout.flush()

                score = store.spamprob(tokenize(hammsg))
                if score > ham_cutoff:
                    if verbose:
                        print >> sys.stderr, "miss ham:  %.6f %s" % (score, hammsg["message-id"])
                    hmisses += 1
                    tdict[hammsg["message-id"]] = True
                    store.learn(tokenize(hammsg), False)

                score = store.spamprob(tokenize(spammsg))
                if score < spam_cutoff:
                    if verbose:
                        print >> sys.stderr, "miss spam: %.6f %s" % (score, spammsg["message-id"])
                    smisses += 1
                    tdict[spammsg["message-id"]] = True
                    store.learn(tokenize(spammsg), True)

        except StopIteration:
            pass

        delta = datetime.datetime.now()-start
        seconds = delta.seconds + delta.microseconds/1000000

        print "\rround: %2d, msgs: %4d, ham misses: %3d, spam misses: %3d, %.1fs" % \
              (round, nmsgs, hmisses, smisses, seconds)

    # We count all untrained messages so the user knows what was skipped.
    # We also tag them for saving so we don't lose messages which might have
    # value in a future run
    nhamleft = 0
    try:
        while True:
            msg = hambone.next()
            tdict[msg["message-id"]] = True
            nhamleft += 1
    except StopIteration:
        if nhamleft: print nhamleft, "untrained hams"

    nspamleft = 0
    try:
        while True:
            msg = spamcan.next()
            tdict[msg["message-id"]] = True
            nspamleft += 1
    except StopIteration:
        if nspamleft: print nspamleft, "untrained spams"
开发者ID:Xodarap,项目名称:Eipi,代码行数:72,代码来源:tte.py

示例7: pickle_read

    try:
        mapd = pickle_read(mapfile)
    except IOError:
        usage("Mapfile %s does not exist" % mapfile)
        return 1

    if not features and not args:
        usage("Require at least one feature (-f) arg or one message file")
        return 1

    if not features:
        # extract significant tokens from each message and identify
        # where they came from
        for f in args:
            for msg in getmbox(f):
                evidence = msg.get("X-Spambayes-Evidence", "")
                evidence = re.sub(r"\s+", " ", evidence)
                l = [e.rsplit(": ", 1)[0]
                     for e in evidence.split("; ")[2:]]
                for s in l:
                    try:
                        s = make_header(decode_header(s)).__unicode__()
                    except:
                        s = unicode(s, 'us-ascii', 'replace')
                    features.add(s)
        if not features:
            usage("No X-Spambayes-Evidence headers found")
            return 1

    if spamfile is not None:
开发者ID:ehuelsmann,项目名称:spambayes,代码行数:30,代码来源:extractmessages.py

示例8: test_spambayes

def test_spambayes(iterations, timer, messages, ham_classifier):
    # Prime the pump. This still leaves some hot functions uncompiled; these
    # will be noticed as hot during the timed loops below.
    for msg in messages:
        ham_classifier.score(msg)

    times = []
    for _ in xrange(iterations):
        t0 = timer()
        for msg in messages:
            ham_classifier.score(msg)
        t1 = timer()
        times.append(t1 - t0)
    return times


if __name__ == "__main__":
    parser = optparse.OptionParser(
        usage="%prog [options]",
        description=("Run the SpamBayes benchmark."))
    util.add_standard_options_to(parser)
    options, args = parser.parse_args()

    data_dir = os.path.join(os.path.dirname(__file__), "data")
    mailbox = os.path.join(data_dir, "spambayes_mailbox")
    ham_data = os.path.join(data_dir, "spambayes_hammie.pkl")
    msgs = list(mboxutils.getmbox(mailbox))
    ham_classifier = hammie.open(ham_data, "pickle", "r")
    util.run_benchmark(options, options.num_runs, test_spambayes,
                       msgs, ham_classifier)
开发者ID:bennn,项目名称:retic_performance,代码行数:30,代码来源:bm_spambayes.py

示例9: bench_spambayes

Run a canned mailbox through a SpamBayes ham/spam classifier.
"""

import os.path

import perf

from spambayes import hammie, mboxutils


__author__ = "[email protected] (Skip Montanaro)"
__contact__ = "[email protected] (Collin Winter)"


def bench_spambayes(ham_classifier, messages):
    for msg in messages:
        ham_classifier.score(msg)


if __name__ == "__main__":
    runner = perf.Runner()
    runner.metadata['description'] = "Run the SpamBayes benchmark."

    data_dir = os.path.join(os.path.dirname(__file__), "data")
    mailbox = os.path.join(data_dir, "spambayes_mailbox")
    ham_data = os.path.join(data_dir, "spambayes_hammie.pkl")
    messages = list(mboxutils.getmbox(mailbox))
    ham_classifier = hammie.open(ham_data, "pickle", "r")

    runner.bench_func('spambayes', bench_spambayes, ham_classifier, messages)
开发者ID:Yaspee,项目名称:performance,代码行数:30,代码来源:bm_spambayes.py

示例10: range

    outdirs = [outputbasepath + ("%d" % i) for i in range(1, n+1)]
    for dir in outdirs:
        if not os.path.isdir(dir):
            os.makedirs(dir)

    counter = 0
    cksums = set()
    skipped = 0
    for inputpath in inputpaths:
        if doglob:
            inpaths = glob.glob(inputpath)
        else:
            inpaths = [inputpath]

        for inpath in inpaths:
            mbox = mboxutils.getmbox(inpath)
            for msg in mbox:
                astext = str(msg)
                cksum = md5(astext).hexdigest()
                if delete_dups and cksum in cksums:
                    skipped += 1
                    continue
                cksums.add(cksum)
                i = random.randrange(n)
                #assert astext.endswith('\n')
                counter += 1
                msgfile = open('%s/%d' % (outdirs[i], counter), 'wb')
                msgfile.write(astext)
                msgfile.close()
                if verbose:
                    if counter % 100 == 0:
开发者ID:ehuelsmann,项目名称:spambayes,代码行数:31,代码来源:splitndirs.py

示例11: train

def train(store, hambox, spambox, maxmsgs, maxrounds, tdict, reverse, verbose,
          ratio):
    round = 0
    ham_cutoff = Options.options["Categorization", "ham_cutoff"]
    spam_cutoff = Options.options["Categorization", "spam_cutoff"]

    # list-ify ham and spam iterators immediately.  We don't really want to
    # fetch the messages multiple times, and this is no worse than what happened
    # before when -R was passed.
    hambone_ = list(mboxutils.getmbox(hambox))
    spamcan_ = list(mboxutils.getmbox(spambox))

    if reverse:
        hambone_ = list(reversed(hambone_))
        spamcan_ = list(reversed(spamcan_))
    
    nspam, nham = len(spamcan_), len(hambone_)
    if ratio:
        rspam, rham = ratio
        # If the actual ratio of spam to ham in the database is better than
        # what was asked for, use that better ratio.
        if (rspam > rham) == (rspam * nham > rham * nspam):
            rspam, rham = nspam, nham

    # define some indexing constants
    ham = 0
    spam = 1
    name = ('ham','spam')
    misses = [0, 0]

    misclassified = lambda is_spam, score: (
        is_spam and score < spam_cutoff or not is_spam and score > ham_cutoff)

    while round < maxrounds and (misses[ham] or misses[spam] or round == 0):
        round += 1
        if verbose:
            print >> sys.stderr, "*** round", round, "***"

        start = datetime.datetime.now()
        hambone = iter(hambone_)
        spamcan = iter(spamcan_)

        i = [0, 0]
        msgs_processed = 0
        misses = [0, 0]
        training_sets = [hambone, spamcan]

        while not maxmsgs or msgs_processed < maxmsgs:

            # should the next message come from hambone or spamcan?
            train_spam = i[ham] * rspam > i[spam] * rham

            try:
                train_msg = training_sets[train_spam].next()
            except StopIteration:
                break

            i[train_spam] += 1
            msgs_processed += 1
            sys.stdout.write("\r%5d" % msgs_processed)
            sys.stdout.flush()

            tokens = list(tokenize(train_msg))
            score = store.spamprob(tokens)
            selector = train_msg["message-id"] or train_msg["subject"]

            if misclassified(train_spam, score) and selector is not None:
                if verbose:
                    print >> sys.stderr, "\tmiss %s: %.6f %s" % (
                        name[train_spam], score, selector)

                misses[train_spam] += 1
                tdict[train_msg["message-id"]] = True
                store.learn(tokens, train_spam)

        delta = datetime.datetime.now()-start
        seconds = delta.seconds + delta.microseconds/1000000

        print "\rround: %2d, msgs: %4d, ham misses: %3d, spam misses: %3d, %.1fs" % \
              (round, msgs_processed, misses[0], misses[1], seconds)

    training_sets = [hambone, spamcan]
    
    # We count all untrained messages so the user knows what was skipped.
    # We also tag them for saving so we don't lose messages which might have
    # value in a future run
    for is_spam in ham, spam:
        nleft = 0
        try:
            while True:
                msg = training_sets[is_spam].next()
                score = store.spamprob(tokenize(msg))
                
                if misclassified(is_spam, score):
                    tdict[msg["message-id"]] = True
                    nleft += 1
                    
        except StopIteration:
            if nleft:
                print nleft, "untrained %ss" % name[is_spam]
开发者ID:ems316,项目名称:Active-Machine-Unlearning,代码行数:100,代码来源:tte.py


注:本文中的spambayes.mboxutils.getmbox函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。