本文整理汇总了Python中nltk.ConditionalFreqDist方法的典型用法代码示例。如果您正苦于以下问题:Python nltk.ConditionalFreqDist方法的具体用法?Python nltk.ConditionalFreqDist怎么用?Python nltk.ConditionalFreqDist使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk
的用法示例。
在下文中一共展示了nltk.ConditionalFreqDist方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: build_word_associations
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ConditionalFreqDist [as 别名]
def build_word_associations():
cfd = nltk.ConditionalFreqDist()
# get a list of all English stop words
stopwords_list = nltk.corpus.stopwords.words('english')
# count words that occur within a window of size 5 ahead of other words
for sentence in nltk.corpus.brown.tagged_sents():
sentence = [(token.lower(), tag) for (token, tag) in sentence if token.lower() not in stopwords_list]
for (index, (token, tag)) in enumerate(sentence):
if token not in stopwords_list:
window = sentence[index+1:index+5]
for (window_token, window_tag) in window:
if window_token not in stopwords_list and window_tag[0] is tag[0]:
cfd[token].inc(window_token)
return cfd
示例2: word_sense_cdf
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ConditionalFreqDist [as 别名]
def word_sense_cdf(word, context, wn_pos):
'''Word sense disambiguation in terms of matching words frequency
between the context each sense's definition. Adapted from
www.slideshare.net/faigg/tutotial-of-sentiment-analysis'''
senses = wordnet.synsets(word, wn_pos)
if len(senses) > 0:
cfd = nltk.ConditionalFreqDist((sense, def_word)
for sense in senses
for def_word in sense.definition().split()
if def_word in context)
best_sense = senses[0]
for sense in senses:
try:
if cfd[sense].max() > cfd[best_sense].max():
best_sense = sense
except:
pass
return best_sense
else:
return None
示例3: test_increment
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ConditionalFreqDist [as 别名]
def test_increment(self):
# make sure that we can still mutate cfd normally
text = "cow cat mouse cat tiger"
cfd = ConditionalFreqDist()
# create cfd with word length as condition
for word in tokenize.word_tokenize(text):
condition = len(word)
cfd[condition][word] += 1
self.assertEqual(cfd.conditions(), [3,5])
# incrementing previously unseen key is still possible
cfd[2]['hi'] += 1
self.assertEqual(set(cfd.conditions()),set([3,5,2])) # new condition added
self.assertEqual(cfd[2]['hi'], 1) # key's frequency incremented from 0 (unseen) to 1
示例4: train_markov_model_from_constraint_matrix
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ConditionalFreqDist [as 别名]
def train_markov_model_from_constraint_matrix(self, csv_path, mm_path,
delim="\t"):
table = [line.split(delim) for line in open(csv_path)]
tags = []
range_states = table.pop(0)[1:]
for row in table:
domain = row[0]
for i, r in enumerate(row[1:]):
s = r.replace(" ", "").strip("\n")
if (s == ''):
continue
if int(s) > 0:
for _ in range(0, int(s)):
tags.append((domain, range_states[i]))
self.cfd_tags = nltk.ConditionalFreqDist(tags)
print "cfd trained, counts:"
self.cfd_tags.tabulate()
print "test:"
print tabulate_cfd(self.cfd_tags)
# save this new cfd for later use
pickle.dump(self.cfd_tags, open(mm_path, "wb"))
# initialize the cpd
self.cpd_tags = nltk.ConditionalProbDist(self.cfd_tags,
nltk.MLEProbDist)
# print "cpd summary:"
# print self.cpd_tags.viewitems()
print tabulate_cfd(self.cpd_tags)
all_outcomes = [v.keys() for v in self.cfd_tags.values()]
self.tag_set = set(self.cfd_tags.keys() +
[y for x in all_outcomes for y in x])
self.viterbi_init() # initialize viterbi
示例5: test_tabulate
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ConditionalFreqDist [as 别名]
def test_tabulate(self):
empty = ConditionalFreqDist()
self.assertEqual(empty.conditions(),[])
try:
empty.tabulate(conditions="BUG") # nonexistent keys shouldn't be added
except:
pass
self.assertEqual(empty.conditions(), [])
示例6: test_plot
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ConditionalFreqDist [as 别名]
def test_plot(self):
empty = ConditionalFreqDist()
self.assertEqual(empty.conditions(),[])
try:
empty.plot(conditions=["BUG"]) # nonexistent keys shouldn't be added
except:
pass
self.assertEqual(empty.conditions(),[])