本文整理汇总了Python中nltk.corpus.brown.sents方法的典型用法代码示例。如果您正苦于以下问题:Python brown.sents方法的具体用法?Python brown.sents怎么用?Python brown.sents使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.corpus.brown
的用法示例。
在下文中一共展示了brown.sents方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: demo
# 需要导入模块: from nltk.corpus import brown [as 别名]
# 或者: from nltk.corpus.brown import sents [as 别名]
def demo():
from nltk.corpus import brown
sents = list(brown.tagged_sents())
test = list(brown.sents())
# create and train the tagger
tagger = TnT()
tagger.train(sents[200:1000])
# tag some data
tagged_data = tagger.tagdata(test[100:120])
# print results
for j in range(len(tagged_data)):
s = tagged_data[j]
t = sents[j+100]
for i in range(len(s)):
print(s[i],'--', t[i])
print()
示例2: info_content
# 需要导入模块: from nltk.corpus import brown [as 别名]
# 或者: from nltk.corpus.brown import sents [as 别名]
def info_content(lookup_word):
"""
Uses the Brown corpus available in NLTK to calculate a Laplace
smoothed frequency distribution of words, then uses this information
to compute the information content of the lookup_word.
"""
global N
if N == 0:
# poor man's lazy evaluation
for sent in brown.sents():
for word in sent:
word = word.lower()
if not word in brown_freqs:
brown_freqs[word] = 0
brown_freqs[word] = brown_freqs[word] + 1
N = N + 1
lookup_word = lookup_word.lower()
n = 0 if not lookup_word in brown_freqs else brown_freqs[lookup_word]
return 1.0 - (math.log(n + 1) / math.log(N + 1))
示例3: demo
# 需要导入模块: from nltk.corpus import brown [as 别名]
# 或者: from nltk.corpus.brown import sents [as 别名]
def demo():
from nltk.tag import tnt
from nltk.corpus import brown
sents = list(brown.tagged_sents())
test = list(brown.sents())
# create and train the tagger
tagger = tnt.TnT()
tagger.train(sents[200:1000])
# tag some data
tagged_data = tagger.tagdata(test[100:120])
# print results
for j in range(len(tagged_data)):
s = tagged_data[j]
t = sents[j+100]
for i in range(len(s)):
print s[i],'--', t[i]
print
示例4: demo
# 需要导入模块: from nltk.corpus import brown [as 别名]
# 或者: from nltk.corpus.brown import sents [as 别名]
def demo():
from nltk.corpus import brown
sents = list(brown.tagged_sents())
test = list(brown.sents())
# create and train the tagger
tagger = TnT()
tagger.train(sents[200:1000])
# tag some data
tagged_data = tagger.tagdata(test[100:120])
# print results
for j in range(len(tagged_data)):
s = tagged_data[j]
t = sents[j + 100]
for i in range(len(s)):
print(s[i], '--', t[i])
print()
示例5: load_data
# 需要导入模块: from nltk.corpus import brown [as 别名]
# 或者: from nltk.corpus.brown import sents [as 别名]
def load_data(mode="train"):
word2idx, idx2word = load_vocab()
from nltk.corpus import brown
sents = [" ".join(words) for words in brown.sents()]
xs, ys = [], []
for sent in sents:
sent = re.sub(r"[^ A-Za-z']", "", sent)
if hp.minlen <= len(sent) <= hp.maxlen:
x, y = [], []
for word in sent.split():
for char in word:
x.append(word2idx[char])
y.append(0) # 0: no space
y[-1] = 1 # space for end of a word
y[-1] = 0 # no space for end of sentence
xs.append(x + [0] * (hp.maxlen-len(x)))
ys.append(y + [0] * (hp.maxlen-len(x)))
# Convert to ndarrays
X = np.array(xs, np.int32)
Y = np.array(ys, np.int32)
# mode
if mode=="train":
X, Y = X[: int(len(X) * .8)], Y[: int(len(Y) * .8)]
# X, Y = X[: 128], Y[: 128]
elif mode=="val":
X, Y = X[int(len(X) * .8): -int(len(X) * .1)], Y[int(len(X) * .8): -int(len(X) * .1)]
else:
X, Y = X[-int(len(X) * .1):], Y[-int(len(X) * .1):]
return X, Y
示例6: test_clause
# 需要导入模块: from nltk.corpus import brown [as 别名]
# 或者: from nltk.corpus.brown import sents [as 别名]
def test_clause():
"""
"""
print ('Measuring time performance on # {} sentences over # {} iterations for recognizing Clause'.format(size, iteration_number))
from nltk.corpus import brown
brown_sents = brown.sents()[:size]
import nltk
global brown_pos_tag_sents
brown_pos_tag_sents = [nltk.pos_tag(sentence) for sentence in brown_sents]
#print (brown_pos_tag_sents[0])
# ----------------------------------------------------
# nltk_parser
# ----------------------------------------------------
analyzer_name='nltk_parser'
times, averagetime, mintime = measure_time(nltk_parse_clause_in_the_whole_text, iteration_number)
grammar = "clause"
print ('{}\t{}\t{}\t{}'.format(analyzer_name, grammar, averagetime, mintime))
# ----------------------------------------------------
# pyrata
# ----------------------------------------------------
analyzer_name='pyrata'
global sentences_dict_list_list
sentences_dict_list_list = []
for s in brown_pos_tag_sents:
sentences_dict_list_list.append([{'raw':w, 'pos':p} for (w, p) in s])
# data -> sentences_dict_list_list
#data = data[0]
# flatten a list of list i.e. sentences of words becomes a text of words
# data = [val for sublist in data for val in sublist]
#print (data[:10])
#print ('len(data):', len(data))
times, averagetime, mintime = measure_time(pyrata_recognize_clause_in_the_whole_text, iteration_number)
grammar = "clause"
print ('{}\t{}\t{}\t{}'.format(analyzer_name, grammar, averagetime, mintime))