當前位置: 首頁>>代碼示例>>Python>>正文


Python nltk.ConditionalFreqDist方法代碼示例

本文整理匯總了Python中nltk.ConditionalFreqDist方法的典型用法代碼示例。如果您正苦於以下問題:Python nltk.ConditionalFreqDist方法的具體用法?Python nltk.ConditionalFreqDist怎麽用?Python nltk.ConditionalFreqDist使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在nltk的用法示例。


在下文中一共展示了nltk.ConditionalFreqDist方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: build_word_associations

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import ConditionalFreqDist [as 別名]
def build_word_associations():
    cfd = nltk.ConditionalFreqDist()

    # get a list of all English stop words
    stopwords_list = nltk.corpus.stopwords.words('english')

    # count words that occur within a window of size 5 ahead of other words
    for sentence in nltk.corpus.brown.tagged_sents():
        sentence = [(token.lower(), tag) for (token, tag) in sentence if token.lower() not in stopwords_list]
        for (index, (token, tag)) in enumerate(sentence):
            if token not in stopwords_list:
                window = sentence[index+1:index+5]
                for (window_token, window_tag) in window:
                    if window_token not in stopwords_list and window_tag[0] is tag[0]:
                        cfd[token].inc(window_token)
    return cfd 
開發者ID:nltk,項目名稱:nltk_teach,代碼行數:18,代碼來源:categories.py

示例2: word_sense_cdf

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import ConditionalFreqDist [as 別名]
def word_sense_cdf(word, context, wn_pos):
    '''Word sense disambiguation in terms of matching words frequency 
    between the context each sense's definition. Adapted from
    www.slideshare.net/faigg/tutotial-of-sentiment-analysis'''
    senses = wordnet.synsets(word, wn_pos)
    if len(senses) > 0:
        cfd = nltk.ConditionalFreqDist((sense, def_word)
                       for sense in senses
                       for def_word in sense.definition().split()
                       if def_word in context)
        best_sense = senses[0]
        for sense in senses:
            try:
                if cfd[sense].max() > cfd[best_sense].max():
                    best_sense = sense
            except: 
                pass                
        return best_sense
    else:
        return None 
開發者ID:stathius,項目名稱:yenlp,代碼行數:22,代碼來源:sentiwordnet.py

示例3: test_increment

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import ConditionalFreqDist [as 別名]
def test_increment(self):
        # make sure that we can still mutate cfd normally
        text = "cow cat mouse cat tiger"
        cfd = ConditionalFreqDist()

        # create cfd with word length as condition 
        for word in tokenize.word_tokenize(text):
            condition = len(word)
            cfd[condition][word] += 1

        self.assertEqual(cfd.conditions(), [3,5])

        # incrementing previously unseen key is still possible
        cfd[2]['hi'] += 1
        self.assertEqual(set(cfd.conditions()),set([3,5,2])) # new condition added
        self.assertEqual(cfd[2]['hi'], 1) # key's frequency incremented from 0 (unseen) to 1 
開發者ID:V1EngineeringInc,項目名稱:V1EngineeringInc-Docs,代碼行數:18,代碼來源:test_cfd_mutation.py

示例4: train_markov_model_from_constraint_matrix

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import ConditionalFreqDist [as 別名]
def train_markov_model_from_constraint_matrix(self, csv_path, mm_path,
                                                  delim="\t"):
        table = [line.split(delim) for line in open(csv_path)]
        tags = []
        range_states = table.pop(0)[1:]
        for row in table:
            domain = row[0]
            for i, r in enumerate(row[1:]):
                s = r.replace(" ", "").strip("\n")
                if (s == ''):
                    continue
                if int(s) > 0:
                    for _ in range(0, int(s)):
                        tags.append((domain, range_states[i]))
        self.cfd_tags = nltk.ConditionalFreqDist(tags)
        print "cfd trained, counts:"
        self.cfd_tags.tabulate()
        print "test:"
        print tabulate_cfd(self.cfd_tags)
        # save this new cfd for later use
        pickle.dump(self.cfd_tags, open(mm_path, "wb"))
        # initialize the cpd
        self.cpd_tags = nltk.ConditionalProbDist(self.cfd_tags,
                                                 nltk.MLEProbDist)
        # print "cpd summary:"
        # print self.cpd_tags.viewitems()
        print tabulate_cfd(self.cpd_tags)
        all_outcomes = [v.keys() for v in self.cfd_tags.values()]
        self.tag_set = set(self.cfd_tags.keys() +
                           [y for x in all_outcomes for y in x])
        self.viterbi_init()  # initialize viterbi 
開發者ID:clp-research,項目名稱:deep_disfluency,代碼行數:33,代碼來源:hmm.py

示例5: test_tabulate

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import ConditionalFreqDist [as 別名]
def test_tabulate(self):
        empty = ConditionalFreqDist()
        self.assertEqual(empty.conditions(),[])
        try:
            empty.tabulate(conditions="BUG") # nonexistent keys shouldn't be added
        except:
            pass
        self.assertEqual(empty.conditions(), []) 
開發者ID:V1EngineeringInc,項目名稱:V1EngineeringInc-Docs,代碼行數:10,代碼來源:test_cfd_mutation.py

示例6: test_plot

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import ConditionalFreqDist [as 別名]
def test_plot(self):
        empty = ConditionalFreqDist()
        self.assertEqual(empty.conditions(),[])
        try:
            empty.plot(conditions=["BUG"]) # nonexistent keys shouldn't be added
        except:
            pass
        self.assertEqual(empty.conditions(),[]) 
開發者ID:V1EngineeringInc,項目名稱:V1EngineeringInc-Docs,代碼行數:10,代碼來源:test_cfd_mutation.py


注:本文中的nltk.ConditionalFreqDist方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。