当前位置: 首页>>代码示例>>Python>>正文


Python treebank.tagged_sents方法代码示例

本文整理汇总了Python中nltk.corpus.treebank.tagged_sents方法的典型用法代码示例。如果您正苦于以下问题:Python treebank.tagged_sents方法的具体用法?Python treebank.tagged_sents怎么用?Python treebank.tagged_sents使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.corpus.treebank的用法示例。


在下文中一共展示了treebank.tagged_sents方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: demo

# 需要导入模块: from nltk.corpus import treebank [as 别名]
# 或者: from nltk.corpus.treebank import tagged_sents [as 别名]
def demo():
    from nltk.corpus import brown
    sents = list(brown.tagged_sents())
    test = list(brown.sents())

    # create and train the tagger
    tagger = TnT()
    tagger.train(sents[200:1000])

    # tag some data
    tagged_data = tagger.tagdata(test[100:120])

    # print results
    for j in range(len(tagged_data)):
        s = tagged_data[j]
        t = sents[j+100]
        for i in range(len(s)):
            print(s[i],'--', t[i])
        print() 
开发者ID:Thejas-1,项目名称:Price-Comparator,代码行数:21,代码来源:tnt.py

示例2: _demo_prepare_data

# 需要导入模块: from nltk.corpus import treebank [as 别名]
# 或者: from nltk.corpus.treebank import tagged_sents [as 别名]
def _demo_prepare_data(tagged_data, train, num_sents, randomize, separate_baseline_data):
    # train is the proportion of data used in training; the rest is reserved
    # for testing.
    if tagged_data is None:
        print("Loading tagged data from treebank... ")
        tagged_data = treebank.tagged_sents()
    if num_sents is None or len(tagged_data) <= num_sents:
        num_sents = len(tagged_data)
    if randomize:
        random.seed(len(tagged_data))
        random.shuffle(tagged_data)
    cutoff = int(num_sents * train)
    training_data = tagged_data[:cutoff]
    gold_data = tagged_data[cutoff:num_sents]
    testing_data = [[t[0] for t in sent] for sent in gold_data]
    if not separate_baseline_data:
        baseline_data = training_data
    else:
        bl_cutoff = len(training_data) // 3
        (baseline_data, training_data) = (training_data[:bl_cutoff], training_data[bl_cutoff:])
    (trainseqs, traintokens) = corpus_size(training_data)
    (testseqs, testtokens) = corpus_size(testing_data)
    (bltrainseqs, bltraintokens) = corpus_size(baseline_data)
    print("Read testing data ({0:d} sents/{1:d} wds)".format(testseqs, testtokens))
    print("Read training data ({0:d} sents/{1:d} wds)".format(trainseqs, traintokens))
    print("Read baseline data ({0:d} sents/{1:d} wds) {2:s}".format(
        bltrainseqs, bltraintokens, "" if separate_baseline_data else "[reused the training set]"))
    return (training_data, baseline_data, gold_data, testing_data) 
开发者ID:Thejas-1,项目名称:Price-Comparator,代码行数:30,代码来源:demo.py

示例3: demo2

# 需要导入模块: from nltk.corpus import treebank [as 别名]
# 或者: from nltk.corpus.treebank import tagged_sents [as 别名]
def demo2():
    from nltk.corpus import treebank

    d = list(treebank.tagged_sents())

    t = TnT(N=1000, C=False)
    s = TnT(N=1000, C=True)
    t.train(d[(11)*100:])
    s.train(d[(11)*100:])

    for i in range(10):
        tacc = t.evaluate(d[i*100:((i+1)*100)])
        tp_un = t.unknown / (t.known + t.unknown)
        tp_kn = t.known / (t.known + t.unknown)
        t.unknown = 0
        t.known = 0

        print('Capitalization off:')
        print('Accuracy:', tacc)
        print('Percentage known:', tp_kn)
        print('Percentage unknown:', tp_un)
        print('Accuracy over known words:', (tacc / tp_kn))

        sacc = s.evaluate(d[i*100:((i+1)*100)])
        sp_un = s.unknown / (s.known + s.unknown)
        sp_kn = s.known / (s.known + s.unknown)
        s.unknown = 0
        s.known = 0

        print('Capitalization on:')
        print('Accuracy:', sacc)
        print('Percentage known:', sp_kn)
        print('Percentage unknown:', sp_un)
        print('Accuracy over known words:', (sacc / sp_kn)) 
开发者ID:Thejas-1,项目名称:Price-Comparator,代码行数:36,代码来源:tnt.py

示例4: demo2

# 需要导入模块: from nltk.corpus import treebank [as 别名]
# 或者: from nltk.corpus.treebank import tagged_sents [as 别名]
def demo2():
    from nltk.corpus import treebank

    d = list(treebank.tagged_sents())

    t = TnT(N=1000, C=False)
    s = TnT(N=1000, C=True)
    t.train(d[(11)*100:])
    s.train(d[(11)*100:])

    for i in range(10):
        tacc = t.evaluate(d[i*100:((i+1)*100)])
        tp_un = float(t.unknown) / float(t.known +t.unknown)
        tp_kn = float(t.known) / float(t.known + t.unknown)
        t.unknown = 0
        t.known = 0

        print('Capitalization off:')
        print('Accuracy:', tacc)
        print('Percentage known:', tp_kn)
        print('Percentage unknown:', tp_un)
        print('Accuracy over known words:', (tacc / tp_kn))

        sacc = s.evaluate(d[i*100:((i+1)*100)])
        sp_un = float(s.unknown) / float(s.known +s.unknown)
        sp_kn = float(s.known) / float(s.known + s.unknown)
        s.unknown = 0
        s.known = 0

        print('Capitalization on:')
        print('Accuracy:', sacc)
        print('Percentage known:', sp_kn)
        print('Percentage unknown:', sp_un)
        print('Accuracy over known words:', (sacc / sp_kn)) 
开发者ID:jarrellmark,项目名称:neighborhood_mood_aws,代码行数:36,代码来源:tnt.py

示例5: demo3

# 需要导入模块: from nltk.corpus import treebank [as 别名]
# 或者: from nltk.corpus.treebank import tagged_sents [as 别名]
def demo3():
    from nltk.corpus import treebank, brown

    d = list(treebank.tagged_sents())
    e = list(brown.tagged_sents())

    d = d[:1000]
    e = e[:1000]

    d10 = int(len(d)*0.1)
    e10 = int(len(e)*0.1)

    tknacc = 0
    sknacc = 0
    tallacc = 0
    sallacc = 0
    tknown = 0
    sknown = 0

    for i in range(10):

        t = TnT(N=1000, C=False)
        s = TnT(N=1000, C=False)

        dtest = d[(i*d10):((i+1)*d10)]
        etest = e[(i*e10):((i+1)*e10)]

        dtrain = d[:(i*d10)] + d[((i+1)*d10):]
        etrain = e[:(i*e10)] + e[((i+1)*e10):]

        t.train(dtrain)
        s.train(etrain)

        tacc = t.evaluate(dtest)
        tp_un = t.unknown / (t.known + t.unknown)
        tp_kn = t.known / (t.known + t.unknown)
        tknown += tp_kn
        t.unknown = 0
        t.known = 0

        sacc = s.evaluate(etest)
        sp_un = s.unknown / (s.known + s.unknown)
        sp_kn = s.known / (s.known + s.unknown)
        sknown += sp_kn
        s.unknown = 0
        s.known = 0

        tknacc += (tacc / tp_kn)
        sknacc += (sacc / tp_kn)
        tallacc += tacc
        sallacc += sacc

        #print i+1, (tacc / tp_kn), i+1, (sacc / tp_kn), i+1, tacc, i+1, sacc


    print("brown: acc over words known:", 10 * tknacc)
    print("     : overall accuracy:", 10 * tallacc)
    print("     : words known:", 10 * tknown)
    print("treebank: acc over words known:", 10 * sknacc)
    print("        : overall accuracy:", 10 * sallacc)
    print("        : words known:", 10 * sknown) 
开发者ID:Thejas-1,项目名称:Price-Comparator,代码行数:63,代码来源:tnt.py

示例6: demo3

# 需要导入模块: from nltk.corpus import treebank [as 别名]
# 或者: from nltk.corpus.treebank import tagged_sents [as 别名]
def demo3():
    from nltk.corpus import treebank, brown

    d = list(treebank.tagged_sents())
    e = list(brown.tagged_sents())

    d = d[:1000]
    e = e[:1000]

    d10 = int(len(d)*0.1)
    e10 = int(len(e)*0.1)

    tknacc = 0
    sknacc = 0
    tallacc = 0
    sallacc = 0
    tknown = 0
    sknown = 0

    for i in range(10):

        t = TnT(N=1000, C=False)
        s = TnT(N=1000, C=False)

        dtest = d[(i*d10):((i+1)*d10)]
        etest = e[(i*e10):((i+1)*e10)]

        dtrain = d[:(i*d10)] + d[((i+1)*d10):]
        etrain = e[:(i*e10)] + e[((i+1)*e10):]

        t.train(dtrain)
        s.train(etrain)

        tacc = t.evaluate(dtest)
        tp_un = float(t.unknown) / float(t.known +t.unknown)
        tp_kn = float(t.known) / float(t.known + t.unknown)
        tknown += tp_kn
        t.unknown = 0
        t.known = 0

        sacc = s.evaluate(etest)
        sp_un = float(s.unknown) / float(s.known + s.unknown)
        sp_kn = float(s.known) / float(s.known + s.unknown)
        sknown += sp_kn
        s.unknown = 0
        s.known = 0

        tknacc += (tacc / tp_kn)
        sknacc += (sacc / tp_kn)
        tallacc += tacc
        sallacc += sacc

        #print i+1, (tacc / tp_kn), i+1, (sacc / tp_kn), i+1, tacc, i+1, sacc


    print("brown: acc over words known:", 10 * tknacc)
    print("     : overall accuracy:", 10 * tallacc)
    print("     : words known:", 10 * tknown)
    print("treebank: acc over words known:", 10 * sknacc)
    print("        : overall accuracy:", 10 * sallacc)
    print("        : words known:", 10 * sknown) 
开发者ID:jarrellmark,项目名称:neighborhood_mood_aws,代码行数:63,代码来源:tnt.py


注:本文中的nltk.corpus.treebank.tagged_sents方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。