本文整理汇总了Python中nltk.corpus.wordnet.morphy函数的典型用法代码示例。如果您正苦于以下问题:Python morphy函数的具体用法?Python morphy怎么用?Python morphy使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了morphy函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: lowest_common_hypernym
def lowest_common_hypernym(fr):
"""
Returns the lowest common hypernym of the two mentions (based on WordNet).
Again assuming that the last word = head word, and that it represents the phrase.
Also considering only the first sense.
"""
try:
i_final=wn.morphy(re.sub(r"\W", r"",fr.i_token.split('_')[-1]))
j_final=wn.morphy(re.sub(r"\W", r"",fr.j_token.split('_')[-1]))
if i_final is None or j_final is None:
return "lowest_common_hypernym={}".format(False)
if _is_pronoun(i_final) or _is_pronoun(j_final):
return "lowest_common_hypernym={}".format(False)
i_synsets=wn.synsets(i_final)
j_synsets=wn.synsets(j_final)
lowest_common_hypernym=i_synsets[0].lowest_common_hypernyms(j_synsets[0])[0]
return "lowest_common_hypernym={}".format(lowest_common_hypernym)
except wn_error:
return "lowest_common_hypernym={}".format(False)
示例2: get_similarity
def get_similarity(self,word1,word2):
'''计算相似度:基于WordNet语义词典'''
'''
print 'before stemmed:',word1
print 'after stemmed:',wn.morphy(word1.lower())
print 'before stemmed:',word2
print 'after stemmed:',wn.morphy(word2.lower())
'''
#stemmed word
if wn.morphy(word1.lower()) != None :
word1 = wn.morphy(word1.lower())
if wn.morphy(word2.lower()) != None :
word2 = wn.morphy(word2.lower())
word1_synsets = wn.synsets(word1)
#print word1_synsets
word2_synsets = wn.synsets(word2)
#print word2_synsets
sim = 0
for syn1 in word1_synsets:
w1 = wn.synset(syn1.name())
for syn2 in word2_synsets:
w2 = wn.synset(syn2.name())
tmp = w1.path_similarity(w2)
#print tmp,syn1.name(),syn2.name()
if tmp > sim:
sim = tmp
return sim
示例3: subclass
def subclass(feats):
if string_match(feats).endswith("False"):
try:
result = False
i_clean = wn.morphy(feats.i_cleaned.lower(), wn.NOUN)
i_synsets = wn.synsets(i_clean)
j_clean = wn.morphy(feats.j_cleaned.lower(), wn.NOUN)
j_synsets = wn.synsets(j_clean)
def get_common_hypernym(i_synset,j_synset):
i_hypernyms = i_synset.hypernyms()
j_hypernyms = j_synset.hypernyms()
if len(i_hypernyms) == 0:
i_synset = i_synset.instance_hypernyms()[0]
if len(j_hypernyms) == 0:
j_synset = j_synset.instance_hypernyms()[0]
subc = i_synset.common_hypernyms(j_synset)
return (i_synset in subc) or (j_synset in subc)
for synset in i_synsets:
for syn in j_synsets:
result = get_common_hypernym(synset,syn)
if result: break
if result:break
return "subclass={}".format(result)
except:
wn_error
return "subclass={}".format(False)
else:
return "subclass={}".format(False)
示例4: preprocessWords
def preprocessWords(lst):
index = 0
while index < len(lst):
word = lst[index].lower()
if word not in reservedWordList:
#special handling from java code
if word == 'financial':
lst[index] = 'finance'
#avoid _id is a word in dscrp
if word == '_id':
lst[index] = 'id'
#only VERB and NOUN are saved, do not know if wn.morphy has many speech stem, which will return as wn.morphy(word)
# if wn.morphy(word, wn.VERB) and wn.morphy(word, wn.NOUN) and wn.morphy(word, wn.VERB) != wn.morphy(word, wn.NOUN):
# print word, wn.morphy(word, wn.VERB), wn.morphy(word, wn.NOUN), wn.morphy(word)
if wn.morphy(word, wn.VERB) or wn.morphy(word, wn.NOUN):
if wn.morphy(word) != word:
lst[index] = wn.morphy(word)
word = lst[index]
elif wn.morphy(PorterStemmer().stem_word(word)):
lst[index] = PorterStemmer().stem_word(word)
word = lst[index]
else:
del lst[index]
continue
if len(word) == 1 or word in stopWordList or word.isdigit():
del lst[index]
continue
index += 1
return lst
示例5: expand_queries
def expand_queries(file):
'''
For each term in a query, takes the first synset of the word from wordnet and adds all synonyms of that synset
'''
file = open(file)
for sentence in file:
sentence = sentence.strip()
if sentence.find('<text>') != -1:
query = sentence[sentence.find('>')+1: sentence.rfind('<')]
additions = ''
updated_q = nltk.pos_tag(nltk.wordpunct_tokenize(query.lower()))
full_q = query
for word, pos in updated_q:
if word not in stopwords.words('english'):
looking_for = str(word)+'.'+str(get_wordnet_pos(pos))+'.01'
synsets = wn.synsets(word)
if looking_for in str(synsets):
new_words = (wn.synset(looking_for).lemma_names) #was .definition
for new_word in new_words:
if new_word.lower() != word.lower():
full_q = full_q +' '+ str(new_word)
else:
if wn.morphy(word) != None:
word = wn.morphy(word)
looking_for = str(word)+'.'+str(get_wordnet_pos(pos))+'.01'
print str(looking_for) + ' THIS IS WORD'
synsets = wn.synsets(word)
if looking_for in str(synsets):
new_words = (wn.synset(looking_for).lemma_names) #was .definition
for new_word in new_words:
if new_word.lower() != word.lower():
full_q = full_q +' '+ str(new_word)
print query + ' '+ full_q
示例6: same_hypernym
def same_hypernym(fr):
"""
True if the two mentions have the same hypernym in WordNet.
In multiword mentions, considering only the last word (I'm assuming last word=head).
Not considering pronouns.
Most of the logic was borrowed from Julia's WN function in the coref project - thank you.
"""
try:
i_final=wn.morphy(re.sub(r"\W", r"",fr.i_token.split('_')[-1]))
j_final=wn.morphy(re.sub(r"\W", r"",fr.j_token.split('_')[-1]))
if i_final is None or j_final is None:
return "same_hypernym={}".format(False)
if _is_pronoun(i_final) or _is_pronoun(j_final):
return "same_hypernym={}".format(False)
i_synsets=wn.synsets(i_final)
j_synsets=wn.synsets(j_final)
for i_synset in i_synsets:
i_hypernym_set=set(i_synset.hypernyms())
for j_synset in j_synsets:
j_hypernym_set=set(j_synset.hypernyms())
if i_hypernym_set.intersection(j_hypernym_set):
return "same_hypernym={}".format(True)
return "same_hypernym={}".format(False)
except wn_error:
return "same_hypernym={}".format(False)
示例7: _wnbase
def _wnbase(self):
if self.postag == 'n':
return wn.morphy(self.lemma, wn.NOUN)
elif self.postag == 'v':
return wn.morphy(self.lemma, wn.VERB)
elif self.postag == 'a':
return wn.morphy(self.lemma, wn.ADJ)
return None
示例8: ApplyBNB
def ApplyBNB(doc_tokens, classes_postings, condprob, prior, vocabulary, selected_features):
## Assumes global dictionaries defined: stop_words, names, negation_words
global stop_words, names, negation_words
scores = dict()
for c in classes_postings:
scores[c] = 0 # math.log(prior[c])
negation_found = False
adverb_found = False
adverb_condprob = 0.0
doc_features = []
for t in doc_tokens:
t = t.lower()
if constants.LA and t in negation_words:
negation_found = True
continue
if t in stop_words:
continue
if t in names:
continue
isAdj = wn.morphy(t, wn.ADJ) is not None
isNoun = wn.morphy(t, wn.NOUN) is not None
isVerb = wn.morphy(t, wn.VERB) is not None
isAdv = wn.morphy(t, wn.ADV) is not None
if constants.LA and negation_found:
negation_found = False
continue
t = process_word(t)
if t not in vocabulary:
continue
if constants.FEATURE_SELECTION is not None and t not in selected_features[c]:
continue
doc_features.append(t)
vocab = vocabulary
if constants.FEATURE_SELECTION is not None:
vocab = selected_features[c]
for t in vocabulary:
if t in doc_features:
scores[c] += math.log(condprob[t][c])
else:
scores[c] += math.log(1.0 - condprob[t][c])
diff = math.fabs(scores["0"] - scores["1"])
return (scores, diff)
示例9: getRoot
def getRoot(w, tag=False):
if tag == False:
for tag in ['v', 'n', 'a', 'r']:
r = wordnet.morphy(w, tagequiv(tag))
if r:
return r
return w
try:
return wordnet.morphy(w, tag)
except:
return w
示例10: get_synonyms_as_set
def get_synonyms_as_set(input_word):
if input_word is None:
return set()
synonyms = set()
synSets = wn.synsets(input_word)
for syn in synSets:
for lemma_name in syn.lemma_names():
if wn.morphy(lemma_name) is not None:
synonyms.add(str(wn.morphy(lemma_name).encode('utf-8').decode('ascii','ignore')))
return synonyms
示例11: getGroup
def getGroup(count, word, threshold, wordsSeen, groups):
word = "".join(l for l in word if l not in string.punctuation)
best = 0
group = word
#searchForExisting
if(wordsSeen.has_key(word)):
return wordsSeen.get(word)
#get synset of word
if(wn.synsets(word)):
wordSyn = wn.synsets(word)[0]
elif(wn.morphy(word)):
wordSyn = wn.morphy(word)[0]
else:
#no synset; use word
wordsSeen.update({word: group})
if(groups.has_key(group)):
newValue = groups.get(group)
newValue.update([word])
groups.update({group: newValue})
else:
newValue = set()
newValue.update([word])
groups.update({group: newValue})
wordsSeen.update({word: group})
return word
#compare to each group
# is there a way to compare one word to many words?
for super_word in count.keys():
#get synset of group being tested against
comparisons = groups.get(super_word)
sim = nSim(wordSyn, comparisons)
if(sim >= threshold and sim > best):
group = super_word
best = sim
wordsSeen.update({word: group})
if(groups.has_key(group)):
newValue = groups.get(group)
newValue.update([word])
groups.update({group: newValue})
else:
newValue = set()
newValue.update([word])
groups.update({group: newValue})
wordsSeen.update({word: group})
return group
示例12: chunktaged
def chunktaged(tokens, tagged, word):
'''
Extract the meaningful chunk (phrase) from the sentence.
Also can be imagined as a phrase detection.
PARAMETER LIST:
tokens is a list of the words in the sentence:
['I', 'previously', 'booked', 'the', 'nice', 'flight', '.']
tagged is a list of tuples consisting of word and POS:
[('I', 'PRP'), ('previously', 'RB'), ('booked', 'VBD'), ('the', 'DT'), ('nice', 'JJ'), ('flight', 'NN'), ('.', '.')]
word is what we look up for:
'booked'
The return value should be a phrase like 'turn_on' or just the origin word.
# the rules as our knowledge:
# 1, consecutive nouns
# 2, verb before a preposition
'''
word_index = tokens.index(word)
if (pos_map.has_key(tagged[word_index][1])):
word_pos = pos_map[tagged[word_index][1]]
else:
return word
if (word_pos == 'VERB' and (wn.morphy(word, wn.VERB) != None)):
word = wn.morphy(word, wn.VERB)
elif (word_pos == 'NOUN' and (wn.morphy(word, wn.NOUN) != None)):
word = wn.morphy(word, wn.NOUN)
if word_index == len(tokens) - 1:
return word
if (pos_map.has_key(tagged[word_index + 1][1])):
next_word_pos = pos_map[tagged[word_index + 1][1]]
else:
return word
if (word_pos == 'VERB' and next_word_pos == 'PP') or \
(word_pos == 'NOUN' and next_word_pos == 'NOUN'):
possible_chunk = word + '_' + tokens[word_index+1]
# in case the consecutive Noun is not a phrase
if wn.synsets(possible_chunk) == []:
return word
else:
return possible_chunk
else:
return word
示例13: get_roots
def get_roots(sentence):
roots = []
for idx, token in enumerate(sentence.clean_tokens):
if sentence.tokens_pos[idx] == "VB":
root = wn.morphy(token, wn.VERB)
else:
root = wn.morphy(token)
if root is None:
root = token
roots.append(root)
return roots
示例14: main
def main():
punIn = raw_input("Pun File: ") # get it it's a pun on "punning" hah hah
f = open(punIn, "r")
for line in f:
posList = POSCheck(line) # returns a list of words that stood out in the POS tagging
hList = homophoneCheck(line) # returns a list of homophones, along with the original word from the sentence
print (posList)
print (hList)
extText = POSextract(line) # returns a list with all of the important words extracted
print (extText)
hiscore = 0
highSim = []
for word in extText:
for i in range(0, len(hList)):
hSim = conceptCheck(word, hList[i])
if hSim == []:
continue
elif hSim[2] > hiscore:
highSim = hSim
hiscore = highSim[2]
for a in range(0, len(hList)):
mword = wn.morphy(word)
if mword:
hMorphSim = conceptCheck(mword, hList[a])
if hMorphSim == []:
continue
elif hMorphSim[2] > hiscore:
highSim = hMorphSim
hiscore = highSim[2]
else:
break
for j in range(0, len(posList)):
pSim = conceptCheck(word, posList[j])
if pSim == []:
continue
elif pSim[2] > hiscore:
highSim = pSim
hiscore = highSim[2]
for b in range(0, len(posList)):
mword = wn.morphy(word)
if mword:
pMorphSim = conceptCheck(mword, posList[b])
if pMorphSim == []:
continue
elif pMorphSim[2] > hiscore:
highSim = pMorphSim
hiscore = highSim[2]
else:
break
print (highSim)
示例15: get_antonyms_as_set
def get_antonyms_as_set(input_word):
if input_word is None:
return set()
antonyms = set()
synonyms = wn.synsets(input_word)
for syn in synonyms:
lemmas = syn.lemmas()
for lem in lemmas:
for ant in lem.antonyms():
if wn.morphy(ant.name()) is not None:
antonyms.add(str(wn.morphy(ant.name()).encode('utf-8').decode('ascii', 'ignore')))
return antonyms