本文整理汇总了Python中nltk.probability.ConditionalFreqDist.keys方法的典型用法代码示例。如果您正苦于以下问题:Python ConditionalFreqDist.keys方法的具体用法?Python ConditionalFreqDist.keys怎么用?Python ConditionalFreqDist.keys使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.probability.ConditionalFreqDist
的用法示例。
在下文中一共展示了ConditionalFreqDist.keys方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: words_by_followers
# 需要导入模块: from nltk.probability import ConditionalFreqDist [as 别名]
# 或者: from nltk.probability.ConditionalFreqDist import keys [as 别名]
def words_by_followers(category):
"""Given a category from the brown corpus, lowercases everything,
and returns a frequency distribution where the keys are words
and the counts are the number of different contexts that each word can appear in."""
bigrams = brown_bigrams(category)
cfdist = ConditionalFreqDist((bigram[1], bigram[0]) for bigram in bigrams)
fdist = FreqDist()
for context in cfdist.keys():
fdist[context] = len(cfdist[context])
return fdist
示例2: is_noun
# 需要导入模块: from nltk.probability import ConditionalFreqDist [as 别名]
# 或者: from nltk.probability.ConditionalFreqDist import keys [as 别名]
### define a fucntion that returns true if the input tag is some form of noun
def is_noun(tag):
return tag.lower in ['nn', 'nns', 'nn$', 'nn-tl','nn+bez',
'nn+hvz', 'nns$','np','np$','mp+bez','nps',
'nps$', 'nr','np-tl','nrs','nr$']
### count nouns that occure whithin a window of size 10 ahead of other nouns
for sentence in brown.tagged_sents():
for (index, tagtuple) in enumerate(sentence):
(token, tag) = tagtuple
token = token.lower()
if token not in stopwords_list and is_noun(tag):
window = sentence[index + 1: index + 10]
for (window_token, window_tag) in window:
window_token = window_token.lower()
if window_token not in stopwords_list and is_noun(window_tag):
cfd[token].inc(window_token)
print 'Irasiau'
print cfd.keys()
print '-' * 100
print cfd['bread']
print cfd['man']
print cfd['man'].max
示例3: len
# 需要导入模块: from nltk.probability import ConditionalFreqDist [as 别名]
# 或者: from nltk.probability.ConditionalFreqDist import keys [as 别名]
not word.startswith('#') and
not word.startswith('http')):
stem = s.stem(word.lower())
else:
stem = word.lower()
if len(stem) == 1 and not stem.isalnum():
continue
if stem in s.stopwords:
continue
if stem != '':
fd[stem] += 1
return fd
# In[123]:
for tag in [u'#nbafinals2015', u'#nbafinals2015_#warriors', u'#warriors']:
words = {}
for root, path, files in os.walk(u'tweets/' + tag):
for fd in Parallel(n_jobs=8)(delayed(processFile)(os.path.join(root, filename)) for filename in files):
cfd[tag].update(fd)
cfd['all'].update(cfd[tag])
# In[170]:
for tag in sorted(cfd.keys()):
cfd[tag].plot(25, title=tag)
示例4: main
# 需要导入模块: from nltk.probability import ConditionalFreqDist [as 别名]
# 或者: from nltk.probability.ConditionalFreqDist import keys [as 别名]
def main(db, pwset_id, dryrun, verbose, basepath, tag_type):
# tags_file = open('grammar/debug.txt', 'w+')
patterns_dist = FreqDist() # distribution of patterns
segments_dist = ConditionalFreqDist() # distribution of segments, grouped by semantic tag
counter = 0
while db.hasNext():
segments = db.nextPwd()
password = ''.join([s.word for s in segments])
tags = []
segments = expand_gaps(segments)
for s in segments: # semantic tags
if tag_type == 'pos':
tag = classify_by_pos(s)
elif tag_type == 'backoff':
tag = classify_semantic_backoff_pos(s)
elif tag_type == 'word':
tag = classify_word(s)
else:
tag = classify_pos_semantic(s)
tags.append(tag)
segments_dist[tag][s.word] += 1
pattern = stringify_pattern(tags)
patterns_dist[pattern] += 1
# outputs the classification results for debugging purposes
if verbose:
print_result(password, segments, tags, pattern)
counter += 1
if counter % 100000 == 0:
print "{} passwords processed so far ({:.2%})... ".format(counter, float(counter)/db.sets_size)
# tags_file.close()
pwset_id = str(pwset_id)
if dryrun:
return
# remove previous grammar
try:
shutil.rmtree(basepath)
except OSError: # in case the above folder does not exist
pass
# recreate the folders empty
os.makedirs(os.path.join(basepath, 'nonterminals'))
with open(os.path.join(basepath, 'rules.txt'), 'w+') as f:
total = patterns_dist.N()
for pattern, freq in patterns_dist.most_common():
f.write('{}\t{}\n'.format(pattern, float(freq)/total))
for tag in segments_dist.keys():
total = segments_dist[tag].N()
with open(os.path.join(basepath, 'nonterminals', str(tag) + '.txt'), 'w+') as f:
for k, v in segments_dist[tag].most_common():
f.write("{}\t{}\n".format(k, float(v)/total))