本文整理汇总了Python中nltk.corpus.treebank.tagged_sents方法的典型用法代码示例。如果您正苦于以下问题:Python treebank.tagged_sents方法的具体用法?Python treebank.tagged_sents怎么用?Python treebank.tagged_sents使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.corpus.treebank
的用法示例。
在下文中一共展示了treebank.tagged_sents方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: demo
# 需要导入模块: from nltk.corpus import treebank [as 别名]
# 或者: from nltk.corpus.treebank import tagged_sents [as 别名]
def demo():
from nltk.corpus import brown
sents = list(brown.tagged_sents())
test = list(brown.sents())
# create and train the tagger
tagger = TnT()
tagger.train(sents[200:1000])
# tag some data
tagged_data = tagger.tagdata(test[100:120])
# print results
for j in range(len(tagged_data)):
s = tagged_data[j]
t = sents[j+100]
for i in range(len(s)):
print(s[i],'--', t[i])
print()
示例2: _demo_prepare_data
# 需要导入模块: from nltk.corpus import treebank [as 别名]
# 或者: from nltk.corpus.treebank import tagged_sents [as 别名]
def _demo_prepare_data(tagged_data, train, num_sents, randomize, separate_baseline_data):
# train is the proportion of data used in training; the rest is reserved
# for testing.
if tagged_data is None:
print("Loading tagged data from treebank... ")
tagged_data = treebank.tagged_sents()
if num_sents is None or len(tagged_data) <= num_sents:
num_sents = len(tagged_data)
if randomize:
random.seed(len(tagged_data))
random.shuffle(tagged_data)
cutoff = int(num_sents * train)
training_data = tagged_data[:cutoff]
gold_data = tagged_data[cutoff:num_sents]
testing_data = [[t[0] for t in sent] for sent in gold_data]
if not separate_baseline_data:
baseline_data = training_data
else:
bl_cutoff = len(training_data) // 3
(baseline_data, training_data) = (training_data[:bl_cutoff], training_data[bl_cutoff:])
(trainseqs, traintokens) = corpus_size(training_data)
(testseqs, testtokens) = corpus_size(testing_data)
(bltrainseqs, bltraintokens) = corpus_size(baseline_data)
print("Read testing data ({0:d} sents/{1:d} wds)".format(testseqs, testtokens))
print("Read training data ({0:d} sents/{1:d} wds)".format(trainseqs, traintokens))
print("Read baseline data ({0:d} sents/{1:d} wds) {2:s}".format(
bltrainseqs, bltraintokens, "" if separate_baseline_data else "[reused the training set]"))
return (training_data, baseline_data, gold_data, testing_data)
示例3: demo2
# 需要导入模块: from nltk.corpus import treebank [as 别名]
# 或者: from nltk.corpus.treebank import tagged_sents [as 别名]
def demo2():
from nltk.corpus import treebank
d = list(treebank.tagged_sents())
t = TnT(N=1000, C=False)
s = TnT(N=1000, C=True)
t.train(d[(11)*100:])
s.train(d[(11)*100:])
for i in range(10):
tacc = t.evaluate(d[i*100:((i+1)*100)])
tp_un = t.unknown / (t.known + t.unknown)
tp_kn = t.known / (t.known + t.unknown)
t.unknown = 0
t.known = 0
print('Capitalization off:')
print('Accuracy:', tacc)
print('Percentage known:', tp_kn)
print('Percentage unknown:', tp_un)
print('Accuracy over known words:', (tacc / tp_kn))
sacc = s.evaluate(d[i*100:((i+1)*100)])
sp_un = s.unknown / (s.known + s.unknown)
sp_kn = s.known / (s.known + s.unknown)
s.unknown = 0
s.known = 0
print('Capitalization on:')
print('Accuracy:', sacc)
print('Percentage known:', sp_kn)
print('Percentage unknown:', sp_un)
print('Accuracy over known words:', (sacc / sp_kn))
示例4: demo2
# 需要导入模块: from nltk.corpus import treebank [as 别名]
# 或者: from nltk.corpus.treebank import tagged_sents [as 别名]
def demo2():
from nltk.corpus import treebank
d = list(treebank.tagged_sents())
t = TnT(N=1000, C=False)
s = TnT(N=1000, C=True)
t.train(d[(11)*100:])
s.train(d[(11)*100:])
for i in range(10):
tacc = t.evaluate(d[i*100:((i+1)*100)])
tp_un = float(t.unknown) / float(t.known +t.unknown)
tp_kn = float(t.known) / float(t.known + t.unknown)
t.unknown = 0
t.known = 0
print('Capitalization off:')
print('Accuracy:', tacc)
print('Percentage known:', tp_kn)
print('Percentage unknown:', tp_un)
print('Accuracy over known words:', (tacc / tp_kn))
sacc = s.evaluate(d[i*100:((i+1)*100)])
sp_un = float(s.unknown) / float(s.known +s.unknown)
sp_kn = float(s.known) / float(s.known + s.unknown)
s.unknown = 0
s.known = 0
print('Capitalization on:')
print('Accuracy:', sacc)
print('Percentage known:', sp_kn)
print('Percentage unknown:', sp_un)
print('Accuracy over known words:', (sacc / sp_kn))
示例5: demo3
# 需要导入模块: from nltk.corpus import treebank [as 别名]
# 或者: from nltk.corpus.treebank import tagged_sents [as 别名]
def demo3():
from nltk.corpus import treebank, brown
d = list(treebank.tagged_sents())
e = list(brown.tagged_sents())
d = d[:1000]
e = e[:1000]
d10 = int(len(d)*0.1)
e10 = int(len(e)*0.1)
tknacc = 0
sknacc = 0
tallacc = 0
sallacc = 0
tknown = 0
sknown = 0
for i in range(10):
t = TnT(N=1000, C=False)
s = TnT(N=1000, C=False)
dtest = d[(i*d10):((i+1)*d10)]
etest = e[(i*e10):((i+1)*e10)]
dtrain = d[:(i*d10)] + d[((i+1)*d10):]
etrain = e[:(i*e10)] + e[((i+1)*e10):]
t.train(dtrain)
s.train(etrain)
tacc = t.evaluate(dtest)
tp_un = t.unknown / (t.known + t.unknown)
tp_kn = t.known / (t.known + t.unknown)
tknown += tp_kn
t.unknown = 0
t.known = 0
sacc = s.evaluate(etest)
sp_un = s.unknown / (s.known + s.unknown)
sp_kn = s.known / (s.known + s.unknown)
sknown += sp_kn
s.unknown = 0
s.known = 0
tknacc += (tacc / tp_kn)
sknacc += (sacc / tp_kn)
tallacc += tacc
sallacc += sacc
#print i+1, (tacc / tp_kn), i+1, (sacc / tp_kn), i+1, tacc, i+1, sacc
print("brown: acc over words known:", 10 * tknacc)
print(" : overall accuracy:", 10 * tallacc)
print(" : words known:", 10 * tknown)
print("treebank: acc over words known:", 10 * sknacc)
print(" : overall accuracy:", 10 * sallacc)
print(" : words known:", 10 * sknown)
示例6: demo3
# 需要导入模块: from nltk.corpus import treebank [as 别名]
# 或者: from nltk.corpus.treebank import tagged_sents [as 别名]
def demo3():
from nltk.corpus import treebank, brown
d = list(treebank.tagged_sents())
e = list(brown.tagged_sents())
d = d[:1000]
e = e[:1000]
d10 = int(len(d)*0.1)
e10 = int(len(e)*0.1)
tknacc = 0
sknacc = 0
tallacc = 0
sallacc = 0
tknown = 0
sknown = 0
for i in range(10):
t = TnT(N=1000, C=False)
s = TnT(N=1000, C=False)
dtest = d[(i*d10):((i+1)*d10)]
etest = e[(i*e10):((i+1)*e10)]
dtrain = d[:(i*d10)] + d[((i+1)*d10):]
etrain = e[:(i*e10)] + e[((i+1)*e10):]
t.train(dtrain)
s.train(etrain)
tacc = t.evaluate(dtest)
tp_un = float(t.unknown) / float(t.known +t.unknown)
tp_kn = float(t.known) / float(t.known + t.unknown)
tknown += tp_kn
t.unknown = 0
t.known = 0
sacc = s.evaluate(etest)
sp_un = float(s.unknown) / float(s.known + s.unknown)
sp_kn = float(s.known) / float(s.known + s.unknown)
sknown += sp_kn
s.unknown = 0
s.known = 0
tknacc += (tacc / tp_kn)
sknacc += (sacc / tp_kn)
tallacc += tacc
sallacc += sacc
#print i+1, (tacc / tp_kn), i+1, (sacc / tp_kn), i+1, tacc, i+1, sacc
print("brown: acc over words known:", 10 * tknacc)
print(" : overall accuracy:", 10 * tallacc)
print(" : words known:", 10 * tknown)
print("treebank: acc over words known:", 10 * sknacc)
print(" : overall accuracy:", 10 * sallacc)
print(" : words known:", 10 * sknown)