当前位置: 首页>>代码示例>>Python>>正文


Python nltk.ConditionalFreqDist方法代码示例

本文整理汇总了Python中nltk.ConditionalFreqDist方法的典型用法代码示例。如果您正苦于以下问题:Python nltk.ConditionalFreqDist方法的具体用法?Python nltk.ConditionalFreqDist怎么用?Python nltk.ConditionalFreqDist使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk的用法示例。


在下文中一共展示了nltk.ConditionalFreqDist方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: build_word_associations

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ConditionalFreqDist [as 别名]
def build_word_associations():
    cfd = nltk.ConditionalFreqDist()

    # get a list of all English stop words
    stopwords_list = nltk.corpus.stopwords.words('english')

    # count words that occur within a window of size 5 ahead of other words
    for sentence in nltk.corpus.brown.tagged_sents():
        sentence = [(token.lower(), tag) for (token, tag) in sentence if token.lower() not in stopwords_list]
        for (index, (token, tag)) in enumerate(sentence):
            if token not in stopwords_list:
                window = sentence[index+1:index+5]
                for (window_token, window_tag) in window:
                    if window_token not in stopwords_list and window_tag[0] is tag[0]:
                        cfd[token].inc(window_token)
    return cfd 
开发者ID:nltk,项目名称:nltk_teach,代码行数:18,代码来源:categories.py

示例2: word_sense_cdf

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ConditionalFreqDist [as 别名]
def word_sense_cdf(word, context, wn_pos):
    '''Word sense disambiguation in terms of matching words frequency 
    between the context each sense's definition. Adapted from
    www.slideshare.net/faigg/tutotial-of-sentiment-analysis'''
    senses = wordnet.synsets(word, wn_pos)
    if len(senses) > 0:
        cfd = nltk.ConditionalFreqDist((sense, def_word)
                       for sense in senses
                       for def_word in sense.definition().split()
                       if def_word in context)
        best_sense = senses[0]
        for sense in senses:
            try:
                if cfd[sense].max() > cfd[best_sense].max():
                    best_sense = sense
            except: 
                pass                
        return best_sense
    else:
        return None 
开发者ID:stathius,项目名称:yenlp,代码行数:22,代码来源:sentiwordnet.py

示例3: test_increment

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ConditionalFreqDist [as 别名]
def test_increment(self):
        # make sure that we can still mutate cfd normally
        text = "cow cat mouse cat tiger"
        cfd = ConditionalFreqDist()

        # create cfd with word length as condition 
        for word in tokenize.word_tokenize(text):
            condition = len(word)
            cfd[condition][word] += 1

        self.assertEqual(cfd.conditions(), [3,5])

        # incrementing previously unseen key is still possible
        cfd[2]['hi'] += 1
        self.assertEqual(set(cfd.conditions()),set([3,5,2])) # new condition added
        self.assertEqual(cfd[2]['hi'], 1) # key's frequency incremented from 0 (unseen) to 1 
开发者ID:V1EngineeringInc,项目名称:V1EngineeringInc-Docs,代码行数:18,代码来源:test_cfd_mutation.py

示例4: train_markov_model_from_constraint_matrix

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ConditionalFreqDist [as 别名]
def train_markov_model_from_constraint_matrix(self, csv_path, mm_path,
                                                  delim="\t"):
        table = [line.split(delim) for line in open(csv_path)]
        tags = []
        range_states = table.pop(0)[1:]
        for row in table:
            domain = row[0]
            for i, r in enumerate(row[1:]):
                s = r.replace(" ", "").strip("\n")
                if (s == ''):
                    continue
                if int(s) > 0:
                    for _ in range(0, int(s)):
                        tags.append((domain, range_states[i]))
        self.cfd_tags = nltk.ConditionalFreqDist(tags)
        print "cfd trained, counts:"
        self.cfd_tags.tabulate()
        print "test:"
        print tabulate_cfd(self.cfd_tags)
        # save this new cfd for later use
        pickle.dump(self.cfd_tags, open(mm_path, "wb"))
        # initialize the cpd
        self.cpd_tags = nltk.ConditionalProbDist(self.cfd_tags,
                                                 nltk.MLEProbDist)
        # print "cpd summary:"
        # print self.cpd_tags.viewitems()
        print tabulate_cfd(self.cpd_tags)
        all_outcomes = [v.keys() for v in self.cfd_tags.values()]
        self.tag_set = set(self.cfd_tags.keys() +
                           [y for x in all_outcomes for y in x])
        self.viterbi_init()  # initialize viterbi 
开发者ID:clp-research,项目名称:deep_disfluency,代码行数:33,代码来源:hmm.py

示例5: test_tabulate

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ConditionalFreqDist [as 别名]
def test_tabulate(self):
        empty = ConditionalFreqDist()
        self.assertEqual(empty.conditions(),[])
        try:
            empty.tabulate(conditions="BUG") # nonexistent keys shouldn't be added
        except:
            pass
        self.assertEqual(empty.conditions(), []) 
开发者ID:V1EngineeringInc,项目名称:V1EngineeringInc-Docs,代码行数:10,代码来源:test_cfd_mutation.py

示例6: test_plot

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import ConditionalFreqDist [as 别名]
def test_plot(self):
        empty = ConditionalFreqDist()
        self.assertEqual(empty.conditions(),[])
        try:
            empty.plot(conditions=["BUG"]) # nonexistent keys shouldn't be added
        except:
            pass
        self.assertEqual(empty.conditions(),[]) 
开发者ID:V1EngineeringInc,项目名称:V1EngineeringInc-Docs,代码行数:10,代码来源:test_cfd_mutation.py


注:本文中的nltk.ConditionalFreqDist方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。