本文整理汇总了Python中nltk.probability.ConditionalFreqDist.items方法的典型用法代码示例。如果您正苦于以下问题:Python ConditionalFreqDist.items方法的具体用法?Python ConditionalFreqDist.items怎么用?Python ConditionalFreqDist.items使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.probability.ConditionalFreqDist
的用法示例。
在下文中一共展示了ConditionalFreqDist.items方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: ContextIndex
# 需要导入模块: from nltk.probability import ConditionalFreqDist [as 别名]
# 或者: from nltk.probability.ConditionalFreqDist import items [as 别名]
class ContextIndex(object):
"""
A bidirectional index between words and their 'contexts' in a text.
The context of a word is usually defined to be the words that occur
in a fixed window around the word; but other definitions may also
be used by providing a custom context function.
"""
@staticmethod
def _default_context(tokens, i):
"""One left token and one right token, normalized to lowercase"""
left = tokens[i - 1].lower() if i != 0 else '*START*'
right = tokens[i + 1].lower() if i != len(tokens) - 1 else '*END*'
return (left, right)
def __init__(self, tokens, context_func=None, filter=None, key=lambda x: x):
self._key = key
self._tokens = tokens
if context_func:
self._context_func = context_func
else:
self._context_func = self._default_context
if filter:
tokens = [t for t in tokens if filter(t)]
self._word_to_contexts = CFD(
(self._key(w), self._context_func(tokens, i)) for i, w in enumerate(tokens)
)
self._context_to_words = CFD(
(self._context_func(tokens, i), self._key(w)) for i, w in enumerate(tokens)
)
def tokens(self):
"""
:rtype: list(str)
:return: The document that this context index was
created from.
"""
return self._tokens
def word_similarity_dict(self, word):
"""
Return a dictionary mapping from words to 'similarity scores,'
indicating how often these two words occur in the same
context.
"""
word = self._key(word)
word_contexts = set(self._word_to_contexts[word])
scores = {}
for w, w_contexts in self._word_to_contexts.items():
scores[w] = f_measure(word_contexts, set(w_contexts))
return scores
def similar_words(self, word, n=20):
scores = defaultdict(int)
for c in self._word_to_contexts[self._key(word)]:
for w in self._context_to_words[c]:
if w != word:
scores[w] += (
self._context_to_words[c][word] * self._context_to_words[c][w]
)
return sorted(scores, key=scores.get, reverse=True)[:n]
def common_contexts(self, words, fail_on_unknown=False):
"""
Find contexts where the specified words can all appear; and
return a frequency distribution mapping each context to the
number of times that context was used.
:param words: The words used to seed the similarity search
:type words: str
:param fail_on_unknown: If true, then raise a value error if
any of the given words do not occur at all in the index.
"""
words = [self._key(w) for w in words]
contexts = [set(self._word_to_contexts[w]) for w in words]
empty = [words[i] for i in range(len(words)) if not contexts[i]]
common = reduce(set.intersection, contexts)
if empty and fail_on_unknown:
raise ValueError("The following word(s) were not found:", " ".join(words))
elif not common:
# nothing in common -- just return an empty freqdist.
return FreqDist()
else:
fd = FreqDist(
c for w in words for c in self._word_to_contexts[w] if c in common
)
return fd
示例2: FreqDist
# 需要导入模块: from nltk.probability import ConditionalFreqDist [as 别名]
# 或者: from nltk.probability.ConditionalFreqDist import items [as 别名]
from nltk.probability import FreqDist
from nltk.probability import ConditionalFreqDist
word_fd = FreqDist()
label_word_fd = ConditionalFreqDist()
testNegWords = movie_reviews.words(categories=['pos'])
testPosWords = movie_reviews.words(categories=['neg'])
for word in testNegWords:
word_fd[word.lower()]+=1
label_word_fd['neg'][word.lower()]+=1
for word in testPosWords:
word_fd[word.lower()]+=1
label_word_fd['pos'][word.lower()]+=1
print(word_fd.N(),word_fd.B(),word_fd.most_common(20))
print(label_word_fd.N(),label_word_fd.conditions(),label_word_fd.items())
print(label_word_fd['pos'].N(),label_word_fd['neg'].N())
# In[ ]:
# n_ii = label_word_fd[label][word]
# n_ix = word_fd[word]
# n_xi = label_word_fd[label].N()
# n_xx = label_word_fd.N()
# w1 ~w1
# ------ ------
# w2 | n_ii | n_oi | = n_xi
# ------ ------
# ~w2 | n_io | n_oo |
# ------ ------
开发者ID:alokkumary2j,项目名称:Sentiment-Analysis-Using-Python-NLTK,代码行数:33,代码来源:MaxEntSentimentAnalysis.py