Python wordnet.morphy函数代码示例

本文整理汇总了Python中nltk.corpus.wordnet.morphy函数的典型用法代码示例。如果您正苦于以下问题：Python morphy函数的具体用法？Python morphy怎么用？Python morphy使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了morphy函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: lowest_common_hypernym

def lowest_common_hypernym(fr):
    """
    Returns the lowest common hypernym of the two mentions (based on WordNet).
    Again assuming that the last word = head word, and that it represents the phrase.
    Also considering only the first sense.
    """
    try:

        i_final=wn.morphy(re.sub(r"\W", r"",fr.i_token.split('_')[-1]))
        j_final=wn.morphy(re.sub(r"\W", r"",fr.j_token.split('_')[-1]))

        if i_final is None or j_final is None:
            return "lowest_common_hypernym={}".format(False)

        if _is_pronoun(i_final) or _is_pronoun(j_final):
            return "lowest_common_hypernym={}".format(False)

        i_synsets=wn.synsets(i_final)
        j_synsets=wn.synsets(j_final)

        lowest_common_hypernym=i_synsets[0].lowest_common_hypernyms(j_synsets[0])[0]

        return "lowest_common_hypernym={}".format(lowest_common_hypernym)

    except wn_error:
        return "lowest_common_hypernym={}".format(False)

开发者ID:pkarmstr，项目名称:relation-extraction，代码行数:26，代码来源:feature_functions.py

示例2: get_similarity

	def get_similarity(self,word1,word2):
		'''计算相似度：基于WordNet语义词典'''
		'''
		print 'before stemmed:',word1
		print 'after stemmed:',wn.morphy(word1.lower())
		print 'before stemmed:',word2
		print 'after stemmed:',wn.morphy(word2.lower())
		'''
		#stemmed word
		if wn.morphy(word1.lower()) != None :
			word1 = wn.morphy(word1.lower())
		if wn.morphy(word2.lower()) != None :
			word2 = wn.morphy(word2.lower()) 
		word1_synsets = wn.synsets(word1)
		#print word1_synsets
		word2_synsets = wn.synsets(word2)
		#print word2_synsets
		sim = 0

		for syn1 in word1_synsets:
			w1 = wn.synset(syn1.name())
			for syn2 in word2_synsets:
				w2 = wn.synset(syn2.name())
				tmp = w1.path_similarity(w2)
				#print tmp,syn1.name(),syn2.name()
				if tmp > sim:
					sim = tmp
		return sim

开发者ID:cnspica，项目名称:ASExtractor，代码行数:28，代码来源:EnKeywordExtraction.py

示例3: subclass

def subclass(feats):
    if string_match(feats).endswith("False"):
        try:
            result = False
            i_clean = wn.morphy(feats.i_cleaned.lower(), wn.NOUN)
            i_synsets = wn.synsets(i_clean)
            j_clean = wn.morphy(feats.j_cleaned.lower(), wn.NOUN)
            j_synsets = wn.synsets(j_clean)
            def get_common_hypernym(i_synset,j_synset):
                i_hypernyms = i_synset.hypernyms()
                j_hypernyms = j_synset.hypernyms()
                if len(i_hypernyms) == 0:
                    i_synset = i_synset.instance_hypernyms()[0]
                if len(j_hypernyms) == 0:
                    j_synset = j_synset.instance_hypernyms()[0]
                subc = i_synset.common_hypernyms(j_synset)
                return (i_synset in subc) or (j_synset in subc)

            for synset in i_synsets:
                for syn in j_synsets:
                    result = get_common_hypernym(synset,syn)
                    if result: break
                if result:break
            return "subclass={}".format(result)
        except:
            wn_error
            return "subclass={}".format(False)

    else:
        return "subclass={}".format(False)

开发者ID:pkarmstr，项目名称:coreference-project，代码行数:30，代码来源:feature_functions.py

示例4: preprocessWords

def preprocessWords(lst):
  index = 0
  while index < len(lst):
    word = lst[index].lower()
    if word not in reservedWordList:
      #special handling from java code
      if word == 'financial':
        lst[index] = 'finance'
      #avoid _id is a word in dscrp
      if word == '_id':
        lst[index] = 'id'
      #only VERB and NOUN are saved, do not know if wn.morphy has many speech stem, which will return as wn.morphy(word)
      # if wn.morphy(word, wn.VERB) and wn.morphy(word, wn.NOUN) and wn.morphy(word, wn.VERB) !=  wn.morphy(word, wn.NOUN):
      # print word, wn.morphy(word, wn.VERB), wn.morphy(word, wn.NOUN), wn.morphy(word)
      if wn.morphy(word, wn.VERB) or wn.morphy(word, wn.NOUN):
        if wn.morphy(word) != word:
          lst[index] = wn.morphy(word)
          word = lst[index]
        elif wn.morphy(PorterStemmer().stem_word(word)):
          lst[index] = PorterStemmer().stem_word(word)
          word = lst[index]
      else:
        del lst[index]
        continue
      if len(word) == 1 or word in stopWordList or word.isdigit():
        del lst[index]
        continue
    index += 1
  return lst

开发者ID:CollaborativeScientificWorkflow，项目名称:semantic-oesvm，代码行数:29，代码来源:preprocess.py

示例5: expand_queries

def expand_queries(file):
    '''
    For each term in a query, takes the first synset of the word from wordnet and adds all synonyms of that synset
    '''
    file = open(file)
    for sentence in file:
        sentence = sentence.strip()
        if sentence.find('<text>') != -1:
            query = sentence[sentence.find('>')+1: sentence.rfind('<')]
            additions = ''
            updated_q = nltk.pos_tag(nltk.wordpunct_tokenize(query.lower()))
            full_q = query
            for word, pos in updated_q:
               if word not in stopwords.words('english'):
                   looking_for = str(word)+'.'+str(get_wordnet_pos(pos))+'.01'                   
                   synsets = wn.synsets(word)
                   if looking_for in str(synsets):
                       new_words = (wn.synset(looking_for).lemma_names) #was .definition
                       for new_word in new_words:
                           if new_word.lower() != word.lower():
                               full_q = full_q +' '+ str(new_word)
                   else:
                       if wn.morphy(word) != None:
                           word = wn.morphy(word)
                           looking_for = str(word)+'.'+str(get_wordnet_pos(pos))+'.01'
                           print str(looking_for) + ' THIS IS WORD'
                           synsets = wn.synsets(word)
                           if looking_for in str(synsets):
                               new_words = (wn.synset(looking_for).lemma_names) #was .definition
                               for new_word in new_words:
                                   if new_word.lower() != word.lower():
                                       full_q = full_q +' '+ str(new_word)
            print query + ' '+ full_q

开发者ID:britth，项目名称:inls890-microblog，代码行数:33，代码来源:wordnetExpansion.py

示例6: same_hypernym

def same_hypernym(fr):
    """
    True if the two mentions have the same hypernym in WordNet.
    In multiword mentions, considering only the last word (I'm assuming last word=head).
    Not considering pronouns.
    Most of the logic was borrowed from Julia's WN function in the coref project - thank you.
    """

    try:

        i_final=wn.morphy(re.sub(r"\W", r"",fr.i_token.split('_')[-1]))
        j_final=wn.morphy(re.sub(r"\W", r"",fr.j_token.split('_')[-1]))

        if i_final is None or j_final is None:
            return "same_hypernym={}".format(False)

        if _is_pronoun(i_final) or _is_pronoun(j_final):
            return "same_hypernym={}".format(False)

        i_synsets=wn.synsets(i_final)
        j_synsets=wn.synsets(j_final)

        for i_synset in i_synsets:
            i_hypernym_set=set(i_synset.hypernyms())
            for j_synset in j_synsets:
                j_hypernym_set=set(j_synset.hypernyms())
                if i_hypernym_set.intersection(j_hypernym_set):
                    return "same_hypernym={}".format(True)

        return "same_hypernym={}".format(False)

    except wn_error:
        return "same_hypernym={}".format(False)

开发者ID:pkarmstr，项目名称:relation-extraction，代码行数:33，代码来源:feature_functions.py

示例7: _wnbase

 def _wnbase(self):
     if self.postag == 'n':
         return wn.morphy(self.lemma, wn.NOUN)
     elif self.postag == 'v':
         return wn.morphy(self.lemma, wn.VERB)
     elif self.postag == 'a':
         return wn.morphy(self.lemma, wn.ADJ)
     return None

开发者ID:aurora1625，项目名称:Recognizing-Textual-Entailment，代码行数:8，代码来源:part4.py

示例8: ApplyBNB

def ApplyBNB(doc_tokens, classes_postings, condprob, prior, vocabulary, selected_features):
    ## Assumes global dictionaries defined: stop_words, names, negation_words
    global stop_words, names, negation_words
    scores = dict()
    for c in classes_postings:
        scores[c] = 0  # math.log(prior[c])

        negation_found = False
        adverb_found = False
        adverb_condprob = 0.0
        doc_features = []
        for t in doc_tokens:
            t = t.lower()

            if constants.LA and t in negation_words:
                negation_found = True
                continue

            if t in stop_words:
                continue

            if t in names:
                continue

            isAdj = wn.morphy(t, wn.ADJ) is not None
            isNoun = wn.morphy(t, wn.NOUN) is not None
            isVerb = wn.morphy(t, wn.VERB) is not None
            isAdv = wn.morphy(t, wn.ADV) is not None

            if constants.LA and negation_found:
                negation_found = False
                continue

            t = process_word(t)

            if t not in vocabulary:
                continue
            if constants.FEATURE_SELECTION is not None and t not in selected_features[c]:
                continue

            doc_features.append(t)

        vocab = vocabulary
        if constants.FEATURE_SELECTION is not None:
            vocab = selected_features[c]

        for t in vocabulary:
            if t in doc_features:
                scores[c] += math.log(condprob[t][c])
            else:
                scores[c] += math.log(1.0 - condprob[t][c])

    diff = math.fabs(scores["0"] - scores["1"])

    return (scores, diff)

开发者ID:aimannajjar，项目名称:columbiau-set-movie-review-classifier，代码行数:55，代码来源:algorithm.py

示例9: getRoot

def getRoot(w, tag=False):
    if tag == False:
        for tag in ['v', 'n', 'a', 'r']:
            r = wordnet.morphy(w, tagequiv(tag))
            if r:
                return r
        return w
    try:
        return wordnet.morphy(w, tag)
    except:
        return w

开发者ID:AllanRamsay，项目名称:COMP34411，代码行数:11，代码来源:te.py

示例10: get_synonyms_as_set

def get_synonyms_as_set(input_word):
    if input_word is None:
        return set()

    synonyms = set()
    synSets = wn.synsets(input_word)
    for syn in synSets:
        for lemma_name in syn.lemma_names():
            if wn.morphy(lemma_name) is not None:
                synonyms.add(str(wn.morphy(lemma_name).encode('utf-8').decode('ascii','ignore')))
    return synonyms

开发者ID:azariaa，项目名称:ENC，代码行数:11，代码来源:helperFunctions.py

示例11: getGroup

def getGroup(count, word, threshold, wordsSeen, groups):
    word = "".join(l for l in word if l not in string.punctuation)
    best = 0
    group = word
    
    #searchForExisting
    if(wordsSeen.has_key(word)):
        return wordsSeen.get(word)
    
    #get synset of word
    if(wn.synsets(word)):
        wordSyn = wn.synsets(word)[0]
    elif(wn.morphy(word)):
        wordSyn = wn.morphy(word)[0]
    else:
        #no synset; use word
        wordsSeen.update({word: group})
        
        if(groups.has_key(group)):
            newValue = groups.get(group)
            newValue.update([word])
            groups.update({group: newValue})
        else:
            newValue = set()
            newValue.update([word])
            groups.update({group: newValue})
            wordsSeen.update({word: group}) 
        return word
    
    #compare to each group
        # is there a way to compare one word to many words?
    for super_word in count.keys():
        #get synset of group being tested against
        comparisons = groups.get(super_word)
        sim = nSim(wordSyn, comparisons)
        
        if(sim >= threshold and sim > best):
            group = super_word
            best = sim
            
    wordsSeen.update({word: group})
    if(groups.has_key(group)):
        newValue = groups.get(group)
        newValue.update([word])
        groups.update({group: newValue})
    else:
        newValue = set()
        newValue.update([word])
        groups.update({group: newValue})
    wordsSeen.update({word: group}) 
    
    return group

开发者ID:EmilyVanHorn，项目名称:Good-Turing，代码行数:52，代码来源:GoodTuringEdit.py

示例12: chunktaged

def chunktaged(tokens, tagged, word):
    '''
    Extract the meaningful chunk (phrase) from the sentence.
    Also can be imagined as a phrase detection.

    PARAMETER LIST:
    tokens is a list of the words in the sentence:
    ['I', 'previously', 'booked', 'the', 'nice', 'flight', '.']
    tagged is a list of tuples consisting of word and POS:
    [('I', 'PRP'), ('previously', 'RB'), ('booked', 'VBD'), ('the', 'DT'), ('nice', 'JJ'), ('flight', 'NN'), ('.', '.')]
    word is what we look up for:
    'booked'

    The return value should be a phrase like 'turn_on' or just the origin word.

    # the rules as our knowledge:
    # 1, consecutive nouns
    # 2, verb before a preposition
    '''

    word_index = tokens.index(word)
    
    if (pos_map.has_key(tagged[word_index][1])):
        word_pos = pos_map[tagged[word_index][1]]
    else:
        return word

    if (word_pos == 'VERB' and (wn.morphy(word, wn.VERB) != None)):
        word = wn.morphy(word, wn.VERB)
    elif (word_pos == 'NOUN' and (wn.morphy(word, wn.NOUN) != None)):
        word = wn.morphy(word, wn.NOUN)
    
    if word_index == len(tokens) - 1:
        return word

    if (pos_map.has_key(tagged[word_index + 1][1])):
        next_word_pos = pos_map[tagged[word_index + 1][1]]
    else:
        return word

    if (word_pos == 'VERB' and next_word_pos == 'PP') or \
       (word_pos == 'NOUN' and next_word_pos == 'NOUN'):
        possible_chunk = word + '_' + tokens[word_index+1]
        # in case the consecutive Noun is not a phrase
        if wn.synsets(possible_chunk) == []:
            return word
        else:
            return possible_chunk
    else:
        return word

开发者ID:ouyangz，项目名称:SchoolProjects，代码行数:50，代码来源:chunk_ver2.py

示例13: get_roots

    def get_roots(sentence):
        roots = []
        for idx, token in enumerate(sentence.clean_tokens):
            if sentence.tokens_pos[idx] == "VB":
                root = wn.morphy(token, wn.VERB)
            else:
                root = wn.morphy(token)

            if root is None:
                root = token

            roots.append(root)

        return roots

开发者ID:djrenren，项目名称:nlpQ-A，代码行数:14，代码来源:davila_features.py

示例14: main

def main():
    punIn = raw_input("Pun File: ")  # get it it's a pun on "punning" hah hah
    f = open(punIn, "r")
    for line in f:
        posList = POSCheck(line)  # returns a list of words that stood out in the POS tagging
        hList = homophoneCheck(line)  # returns a list of homophones, along with the original word from the sentence
        print (posList)
        print (hList)
        extText = POSextract(line)  # returns a list with all of the important words extracted
        print (extText)
        hiscore = 0
        highSim = []
        for word in extText:
            for i in range(0, len(hList)):
                hSim = conceptCheck(word, hList[i])
                if hSim == []:
                    continue
                elif hSim[2] > hiscore:
                    highSim = hSim
                    hiscore = highSim[2]
            for a in range(0, len(hList)):
                mword = wn.morphy(word)
                if mword:
                    hMorphSim = conceptCheck(mword, hList[a])
                    if hMorphSim == []:
                        continue
                    elif hMorphSim[2] > hiscore:
                        highSim = hMorphSim
                        hiscore = highSim[2]
                else:
                    break
            for j in range(0, len(posList)):
                pSim = conceptCheck(word, posList[j])
                if pSim == []:
                    continue
                elif pSim[2] > hiscore:
                    highSim = pSim
                    hiscore = highSim[2]
            for b in range(0, len(posList)):
                mword = wn.morphy(word)
                if mword:
                    pMorphSim = conceptCheck(mword, posList[b])
                    if pMorphSim == []:
                        continue
                    elif pMorphSim[2] > hiscore:
                        highSim = pMorphSim
                        hiscore = highSim[2]
                else:
                    break
            print (highSim)

开发者ID:jxjzhang，项目名称:Punnography，代码行数:50，代码来源:punRecog.py

示例15: get_antonyms_as_set

def get_antonyms_as_set(input_word):
    if input_word is None:
        return set()

    antonyms = set()
    synonyms = wn.synsets(input_word)
    
    for syn in synonyms:
        lemmas = syn.lemmas()
        
        for lem in lemmas:
            for ant in lem.antonyms():
                if wn.morphy(ant.name()) is not None:
                    antonyms.add(str(wn.morphy(ant.name()).encode('utf-8').decode('ascii', 'ignore')))
    return antonyms

开发者ID:azariaa，项目名称:ENC，代码行数:15，代码来源:helperFunctions.py

注：本文中的nltk.corpus.wordnet.morphy函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。