当前位置: 首页>>代码示例>>Python>>正文


Python ConditionalFreqDist.keys方法代码示例

本文整理汇总了Python中nltk.probability.ConditionalFreqDist.keys方法的典型用法代码示例。如果您正苦于以下问题:Python ConditionalFreqDist.keys方法的具体用法?Python ConditionalFreqDist.keys怎么用?Python ConditionalFreqDist.keys使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.probability.ConditionalFreqDist的用法示例。


在下文中一共展示了ConditionalFreqDist.keys方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: words_by_followers

# 需要导入模块: from nltk.probability import ConditionalFreqDist [as 别名]
# 或者: from nltk.probability.ConditionalFreqDist import keys [as 别名]
def words_by_followers(category):
    """Given a category from the brown corpus, lowercases everything,
    and returns a frequency distribution where the keys are words
    and the counts are the number of different contexts that each word can appear in."""
    bigrams = brown_bigrams(category)
    cfdist = ConditionalFreqDist((bigram[1], bigram[0]) for bigram in bigrams)
    fdist = FreqDist()
    for context in cfdist.keys():
        fdist[context] = len(cfdist[context])
    return fdist
开发者ID:slee17,项目名称:NLP,代码行数:12,代码来源:languageModel.py

示例2: is_noun

# 需要导入模块: from nltk.probability import ConditionalFreqDist [as 别名]
# 或者: from nltk.probability.ConditionalFreqDist import keys [as 别名]
### define a fucntion that returns true if the input tag is some form of noun
def is_noun(tag):
	return tag.lower in ['nn', 'nns', 'nn$', 'nn-tl','nn+bez',
                       'nn+hvz', 'nns$','np','np$','mp+bez','nps',
                       'nps$', 'nr','np-tl','nrs','nr$']


### count nouns that occure whithin a window of size 10 ahead of other nouns
for sentence in brown.tagged_sents():
	for (index, tagtuple) in enumerate(sentence):
		(token, tag) = tagtuple
		token = token.lower()
		if token not in stopwords_list and is_noun(tag):
			window = sentence[index + 1: index + 10]
			for (window_token, window_tag) in window:
				window_token = window_token.lower()
				if window_token not in stopwords_list and is_noun(window_tag):
					cfd[token].inc(window_token)
					print 'Irasiau'

print cfd.keys()
print '-' * 100
print cfd['bread']
print cfd['man']
print cfd['man'].max




开发者ID:Migisa,项目名称:Misc,代码行数:27,代码来源:corpusExplore.py

示例3: len

# 需要导入模块: from nltk.probability import ConditionalFreqDist [as 别名]
# 或者: from nltk.probability.ConditionalFreqDist import keys [as 别名]
                    not word.startswith('#') and
                    not word.startswith('http')):
                    stem = s.stem(word.lower())
                else:
                    stem = word.lower()
                if len(stem) == 1 and not stem.isalnum():
                    continue
                if stem in s.stopwords:
                    continue
                if stem != '':
                    fd[stem] += 1
    return fd


# In[123]:

for tag in [u'#nbafinals2015', u'#nbafinals2015_#warriors', u'#warriors']:
    words = {}
    for root, path, files in os.walk(u'tweets/' + tag):
        for fd in Parallel(n_jobs=8)(delayed(processFile)(os.path.join(root, filename)) for filename in files):
            cfd[tag].update(fd)
    cfd['all'].update(cfd[tag])
        


# In[170]:

for tag in sorted(cfd.keys()):
    cfd[tag].plot(25, title=tag)

开发者ID:nkrishnaswami,项目名称:mids-w205-hw2,代码行数:31,代码来源:proc.py

示例4: main

# 需要导入模块: from nltk.probability import ConditionalFreqDist [as 别名]
# 或者: from nltk.probability.ConditionalFreqDist import keys [as 别名]
def main(db, pwset_id, dryrun, verbose, basepath, tag_type):
#    tags_file = open('grammar/debug.txt', 'w+')
    
    patterns_dist = FreqDist()  # distribution of patterns
    segments_dist = ConditionalFreqDist()  # distribution of segments, grouped by semantic tag
    
    counter = 0
    
    while db.hasNext():
        segments = db.nextPwd()
        password = ''.join([s.word for s in segments])
        tags = []

        segments = expand_gaps(segments)
        
        for s in segments:  # semantic tags
            if tag_type == 'pos':
                tag = classify_by_pos(s)
            elif tag_type == 'backoff':
                tag = classify_semantic_backoff_pos(s)
            elif tag_type == 'word':
                tag = classify_word(s)
            else:
                tag = classify_pos_semantic(s)

            tags.append(tag)
            segments_dist[tag][s.word] += 1
            
        pattern = stringify_pattern(tags)
        
        patterns_dist[pattern] += 1
        
        # outputs the classification results for debugging purposes
        if verbose:
            print_result(password, segments, tags, pattern)

        counter += 1
        if counter % 100000 == 0:
            print "{} passwords processed so far ({:.2%})... ".format(counter, float(counter)/db.sets_size)
         
#     tags_file.close()

    pwset_id = str(pwset_id)
    
    if dryrun:
        return

    # remove previous grammar
    try:
        shutil.rmtree(basepath)
    except OSError: # in case the above folder does not exist 
        pass
    
    # recreate the folders empty
    os.makedirs(os.path.join(basepath, 'nonterminals'))

    with open(os.path.join(basepath, 'rules.txt'), 'w+') as f:
        total = patterns_dist.N()
        for pattern, freq in patterns_dist.most_common():
            f.write('{}\t{}\n'.format(pattern, float(freq)/total))
    
    for tag in segments_dist.keys():
        total = segments_dist[tag].N()
        with open(os.path.join(basepath, 'nonterminals', str(tag) + '.txt'), 'w+') as f:
            for k, v in segments_dist[tag].most_common():
                f.write("{}\t{}\n".format(k, float(v)/total))
开发者ID:,项目名称:,代码行数:68,代码来源:


注:本文中的nltk.probability.ConditionalFreqDist.keys方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。