本文整理汇总了Python中nltk.stem.WordNetLemmatizer.stem方法的典型用法代码示例。如果您正苦于以下问题:Python WordNetLemmatizer.stem方法的具体用法?Python WordNetLemmatizer.stem怎么用?Python WordNetLemmatizer.stem使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.stem.WordNetLemmatizer
的用法示例。
在下文中一共展示了WordNetLemmatizer.stem方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: stem_cluster
# 需要导入模块: from nltk.stem import WordNetLemmatizer [as 别名]
# 或者: from nltk.stem.WordNetLemmatizer import stem [as 别名]
def stem_cluster(data, mode = 10, length_at_least = 3):
global stemmer
# load default stemmer (nltk lemmatizer)
if stemmer == None:
try: # import if corpus exists
from nltk.stem import WordNetLemmatizer
except: # download corpora if does not exist
import nltk
if not nltk.download('wordnet'):
raise Exception('Error in downloading wordnet. \
Please make sure you are connected to the network, \
or try downloading manually.')
from nltk.stem import WordNetLemmatizer
# cache the default stemmer
stemmer = WordNetLemmatizer()
# port the lemmatizer as the stemmer
stemmer.stem = stemmer.lemmatize
from algoutils import flatten, split
from collections import defaultdict
# split data into words
words = flatten(split(data, ' '))
# collect frequency of individual words
frequency = defaultdict(int)
for word in words:
if len(word) >= length_at_least:
frequency[word] += 1
# filter words by frequency
words = filter(lambda (word,freq): freq >= mode, frequency.items())
words = list(zip(*words)[0])
# trim stems
stem_map = defaultdict(list)
stem = stemmer.stem
for word in words:
stem_map[stem(word)].append(word)
# only return representative
# aka. the word with least length
return map(lambda rep: min(rep, key=len), stem_map.values())
示例2: stem_test
# 需要导入模块: from nltk.stem import WordNetLemmatizer [as 别名]
# 或者: from nltk.stem.WordNetLemmatizer import stem [as 别名]
def stem_test():
stemmer = WordNetLemmatizer()
print stemmer.lemmatize("environment")
print stemmer.lemmatize("environmental")
stemmer = PorterStemmer()
print stemmer.stem("environmental")