本文整理汇总了Python中nltk.stem.wordnet.WordNetLemmatizer方法的典型用法代码示例。如果您正苦于以下问题:Python wordnet.WordNetLemmatizer方法的具体用法?Python wordnet.WordNetLemmatizer怎么用?Python wordnet.WordNetLemmatizer使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.stem.wordnet
的用法示例。
在下文中一共展示了wordnet.WordNetLemmatizer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: is_atomic_mwe
# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def is_atomic_mwe(mwe, verb_lemma, complement_lemma, synsets):
mwe_count = 0
for synset in synsets:
gloss_lemmas = set([WordNetLemmatizer().lemmatize(word) for word in synset.definition.split()])
if verb_lemma in gloss_lemmas or complement_lemma in gloss_lemmas:
return False
for syn_lemma in synset.lemmas:
if syn_lemma.name != mwe:
tokens = syn_lemma.name.split('_')
for token in tokens:
if token == verb_lemma:
return False
if len(tokens) == 2 and tokens[1] == complement_lemma:
return False
else:
mwe_count += syn_lemma.count()
return True
示例2: ulterior_clean
# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def ulterior_clean(tweets, filename):
if not os.path.exists(filename):
stopwords = get_stopwords_list()
lemmatizer = WordNetLemmatizer()
filtered_tweets = []
for tw in tweets:
filtered_tweet = []
for t in tw.split():
token = t.lower()
if token in stopwords:
continue
filtered_token = lemmatizer.lemmatize(token, 'v')
filtered_token = lemmatizer.lemmatize(filtered_token)
filtered_tweet.append(filtered_token)
filtered_tweets.append(' '.join(filtered_tweet))
utils.save_file(filtered_tweets, filename)
# Load the filtered tokens
filtered_tweets = utils.load_file(filename)
return filtered_tweets
示例3: __init__
# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def __init__(self, save_dir, config_path='./miner.yaml'):
super(ImageNetMiner, self).__init__(save_dir, config_path)
self.__engine__ = 'imagenet'
self.format_url = 'http://www.image-net.org/api/text/imagenet.synset.geturls?wnid={}'
# maximum number of synsets to retrieve - we don't need all images necessarily, other-
# wise we get enormous amounts of synsets for words like 'entity' or 'animal'
self.max_synsets = 10000
self.wnl = WordNetLemmatizer()
# url cache
self.imgnet_url_cache = {}
# whether we "level up" in hierarchy if no images found
self.level_up_if_no_images = True
示例4: convert_to_vw
# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def convert_to_vw(text):
tokenizer = nltk.RegexpTokenizer(r'\w+')
lmtzr = WordNetLemmatizer()
tokens = [t.lower() for t in tokenizer.tokenize(text)]
id_ = 13371337
processed = []
for t in tokens:
l = lmtzr.lemmatize(t)
processed.append(l)
counted = Counter(processed)
res_str = str(id_)
for k, v in counted.items():
if v != 1:
res_str = res_str + " {}:{}".format(k, v)
else:
res_str = res_str + " {}".format(k)
return res_str
示例5: extract_lemmatized_nouns
# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def extract_lemmatized_nouns(self, new_review):
stopwords = self.load_stopwords()
words = []
sentences = nltk.sent_tokenize(new_review.lower())
for sentence in sentences:
tokens = nltk.word_tokenize(sentence)
text = [word for word in tokens if word not in stopwords]
tagged_text = nltk.pos_tag(text)
for word, tag in tagged_text:
words.append({"word": word, "pos": tag})
lem = WordNetLemmatizer()
nouns = []
for word in words:
if word["pos"] in ["NN", "NNS"]:
nouns.append(lem.lemmatize(word["word"]))
return nouns
示例6: __init__
# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def __init__(self, stoplist=None, punct=None, lemmatizer=None):
# Load stopwords, punctuation, and lemmatizer
# This takes a bit of work, so we only want to do it once!
self.stopwords = stoplist or stopwords.words('english')
self.punctuation = punct or string.punctuation
self.lemmatizer = lemmatizer or WordNetLemmatizer()
示例7: __init__
# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def __init__(self):
self.lmtzr = WordNetLemmatizer()
self.aux_verbs = ['be']
示例8: getLemma
# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def getLemma(text, contextFlag=False):
lemmatizer = WordNetLemmatizer()
#'NN':wordnet.NOUN,'JJ':wordnet.ADJ,'VB':wordnet.VERB,'RB':wordnet.ADV
wordnet_tag ={'NN':'n','JJ':'a','VB':'v','RB':'r'}
result = None
if text.split() == 1: # on word
tokenized = word_tokenize(t)
tagged = pos_tag(tokenized)[0]
lemma = ''
try: lemma = lemmatizer.lemmatize(tagged[0],wordnet_tag[tagged[1][:2]])
except: lemma = lemmatizer.lemmatize(tagged[0])
result = lemma
elif text.split() > 1 and contextFlag == True: # mutiple words i.e. text and without considering the context
resultList = []
for t in text.split():
tokenized = word_tokenize(t)
tagged = pos_tag(tokenized)[0]
lemma = ''
try: lemma = lemmatizer.lemmatize(tagged[0],wordnet_tag[tagged[1][:2]])
except: lemma = lemmatizer.lemmatize(tagged[0])
resultList.append(lemma)
result = ' '.join(resultList)
else: # mutiple words i.e. text and consider the context
resultList = []
tokens = word_tokenize(text)
tagged = pos_tag(tokens)
for t in tagged:
try: resultList.append(lemmatizer.lemmatize(t[0],wordnet_tag[t[1][:2]]))
except: resultList.append(lemmatizer.lemmatize(t[0]))
result = ' '.join(resultList)
return result
###################################################################################
# Given a Naive Bayes classifier, classify a text with a given certaintaity
示例9: __init__
# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def __init__(self):
self.stopwords = set(stopwords.words("english"))
self.lemmatizer = WordNetLemmatizer()
示例10: verbs2basicform
# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def verbs2basicform(words):
ret = []
for w in words:
analysis = wn.synsets(w)
if any([a.pos() == 'v' for a in analysis]):
w = WordNetLemmatizer().lemmatize(w, 'v')
ret.append(w)
return ret
示例11: parseSentence
# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def parseSentence(line):
lmtzr = WordNetLemmatizer()
stop = stopwords.words('english')
text_token = CountVectorizer().build_tokenizer()(line.lower())
text_rmstop = [i for i in text_token if i not in stop]
text_stem = [lmtzr.lemmatize(w) for w in text_rmstop]
return text_stem
示例12: __init__
# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def __init__(self, language='english'):
self.stopwords = frozenset(nltk.corpus.stopwords.words(language))
self.lemmatizer = WordNetLemmatizer()
示例13: __init__
# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def __init__(self, language='english'):
self.stopwords = set(nltk.corpus.stopwords.words(language))
self.lemmatizer = WordNetLemmatizer()
示例14: __init__
# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def __init__(self, language='english', minimum=2, maximum=200):
self.min = minimum
self.max = maximum
self.stopwords = set(nltk.corpus.stopwords.words(language))
self.lemmatizer = WordNetLemmatizer()
示例15: unify_word
# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def unify_word(word): # went -> go, apples -> apple, BIG -> big
"""unify verb tense and noun singular"""
ADJ, ADJ_SAT, ADV, NOUN, VERB = 'a', 's', 'r', 'n', 'v'
for wt in [ADJ, ADJ_SAT, ADV, NOUN, VERB]:
try:
word = WordNetLemmatizer().lemmatize(word, pos=wt)
except:
pass
return word.lower()
开发者ID:WayneDW,项目名称:Sentiment-Analysis-in-Event-Driven-Stock-Price-Movement-Prediction,代码行数:11,代码来源:util.py