当前位置: 首页>>代码示例>>Python>>正文


Python wordnet.synsets函数代码示例

本文整理汇总了Python中nltk.corpus.wordnet.synsets函数的典型用法代码示例。如果您正苦于以下问题:Python synsets函数的具体用法?Python synsets怎么用?Python synsets使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了synsets函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: findSimilarity

 def findSimilarity(self):
     #As we recommend only one item first item of this list will be recommended item
     #Second item can be list of items
     '''So what we try to do is get exact synset of first item and get 10 synsets (to reduce computation costs) of second list of items over 
     which the first item was preferred/recommended'''
     
     recommendation = wn.synsets(self.recoItems[0])  # @UndefinedVariable
     recommendationFiltered = []
     for eachSyn in recommendation:
         if self.recoItems[0] in str(eachSyn):
             recommendationFiltered.append(eachSyn)
             
     choices = {}
     for eachItem in self.recoItems[1]:
         choices[eachItem] = wn.synsets(eachItem)[:10]   # @UndefinedVariable getting only 10 items
     
     choiceScores = {}
     for key, value in choices.iteritems():
         choiceScores[key] = []
         for eachValue in choices[key]:            
             for eachRecoSyn in recommendationFiltered:                
                 choiceScores[key].append(eachRecoSyn.path_similarity(eachValue))
                         
     maxChoiceScores = {}
     
     for eachKey in choiceScores.keys():        
         maxChoiceScores[eachKey] = max(choiceScores[eachKey])
     
     return maxChoiceScores
开发者ID:malughanshyam,项目名称:B552_Knowledge_Based_AI_Project,代码行数:29,代码来源:SimilarityFinder.py

示例2: parseLyrics2

def parseLyrics2(outlist):
	bandLyricInfo = {} 
	master = [['death', 0],['violence',0],['sacrifice',0],['nature',0],['peace',0],['storm',0],['spirit',0],[ 'dark',0],['scream',0],['pain',0],['blood',0],['flesh',0],['love',0],['greed',0],['poison',0],['anger',0],['revenge',0],['misery',0],['hell',0],['heaven',0],['hate',0],['soul',0],['battle',0],['ghost',0],['joy',0],['light',0],['omen',0],['miracle',0],['magic',0],['universe',0],['disease',0],['god',0],['satan',0],['struggle',0],['heart',0]]
	for key in outlist:
		templist = copy.deepcopy(master) ;
		#key = 'Queensryche'
		raw = outlist[key];
		raw = raw.lower();
		words = re.findall(r'\w+', raw,flags = re.UNICODE | re.LOCALE) # punctuation
		imp_words = filter(lambda x: x not in stopwords.words('english'), words) # filter noise
		lmt = WordNetLemmatizer()
		words_new = [lmt.lemmatize(x) for x in words]
		dw = list(set(words_new))
		
		for word in dw:
			for m in templist:
				p1 = wordnet.synsets(word) ;
				p2 = wordnet.synsets(m[0]) ;
				if(len(p1) >0 and len(p2) >0):
					c = p1[0].wup_similarity(p2[0])
					if(c > m[1]):
						m[1] = c
		# sort words according to similarity
		tnew = sorted(templist,key=lambda val:val[1],reverse=True) [0:10] ;
		# remove the other column
		for l in tnew:
			del l[1]
		print 'Done ',key
		#break ;
		bandLyricInfo[key] = tnew
		#del templist
	return bandLyricInfo
开发者ID:sam8401,项目名称:pyMusicRecommender,代码行数:32,代码来源:mainScript.py

示例3: relation

def relation(a,b) :
    ''' Given two words(strings) returns a number that denotes relation between
    the two words.

    Parameters
    ----------
    a : string
    b : string

    Returns
    -------
    float
        relation (less than 1) between two strings

    Notes
    -----
    First it applies BFS on the nltk wordnet and finds the least distance between
    the two given words. If the distance is x the function returns 1/(x+1), else return 0.

    '''
    a = wn.synsets(a)
    b = wn.synsets(b)
    visited_a = set([])
    visited_b = set([])
    stemmed_a = set([])
    stemmed_b = set([])
    depth = 0
    while True:
        if depth > 2:
            return 0
        new_a = set([])
        depth += 1
        for syn in a:
            if stemmer.stem(syn.lemma_names[0]) in stemmed_b:
                return 1.0/depth
            if syn in visited_a:
                continue
            visited_a.add(syn)
            stemmed_a.add(stemmer.stem(syn.lemma_names[0]))
            hyp = set(syn.hyponyms())
            for lemma in syn.lemma_names:
                None
                hyp |= set(wn.synsets(lemma))
            new_a |= hyp
        a = new_a
        new_b = set([])
        depth += 1
        for syn in b:
            if stemmer.stem(syn.lemma_names[0]) in stemmed_a:
                return 1.0/depth
            if syn in visited_b:
                continue
            visited_b.add(syn)
            stemmed_b.add(stemmer.stem(syn.lemma_names[0]))
            hyp = set(syn.hyponyms())
            for lemma in syn.lemma_names:
                None
                hyp |= set(wn.synsets(lemma))
            new_b |= hyp
        b = new_b
开发者ID:jamesosullivan,项目名称:Sentiment_Analysis,代码行数:60,代码来源:sentiment_analysis.py

示例4: tell

def tell(para1,para2):
	#Strip anything but not alphanum
	para1=re.sub(r'[^\w ]+', '', para1)
	para2=re.sub(r'[^\w ]+', '', para2)
	
	para1=para1.lower().split()
	para2=para2.lower().split()
	
	if para1==[] or para2==[]:
		return 0

	if not filter(lambda t:t.lower() not in stopwords, para1) == []:
		para1=filter(lambda t:t.lower() not in stopwords, para1)
	if not filter(lambda t:t.lower() not in stopwords, para2) == []:
		para2=filter(lambda t:t.lower() not in stopwords, para2)
	
	score=len(set(para1).intersection(para2))
	score_1=float(score)/math.sqrt(len(para2)*len(para1))
	
	para1_with_dictionary=reduce(lambda x,y:x+y, map(lambda word:[l.name for s in wordnet.synsets(word) for l in s.lemmas],para1))
	para1_with_dictionary=map(lambda ele:ele.lower(), para1_with_dictionary)
	#^^ Returns duplicated elements as well. So we need to remove the duplicates. Converting into set does that
	
	para2_with_dictionary=reduce(lambda x,y:x+y, map(lambda word:[l.name for s in wordnet.synsets(word) for l in s.lemmas],para2))
	para2_with_dictionary=map(lambda ele:ele.lower(), para2_with_dictionary)
	
	#^^ Returns duplicated elements as well. So we need to remove the duplicates. While taking intersection the same is handled

	score1=len(set(para1_with_dictionary).intersection(para2))
	score2=len(set(para2_with_dictionary).intersection(para1))

	score_2=float(max(score1,score2))/min(len(para2),len(para1))
	
	score=(score_1+score_2)/2
	return score
开发者ID:sanjitsbatra,项目名称:Sentiment_Mining_Twitter,代码行数:35,代码来源:similarity.py

示例5: parse_file

def parse_file(f):
  for l in f.readlines():    
    word = l.strip()
    synsets = wn.synsets(word)
    
    if word in synonym_values:
      continue
    
    # get first order synonyms
    synonyms = set()
    for synset in synsets:
      synonyms = set(synonyms) | set(synset.lemma_names)
  
    # add in synonyms of those synonyms
    for syn in synonyms:
      for syn_synset in wn.synsets(syn):
        synonyms = set(synonyms) | set(syn_synset.lemma_names)
    
    synonyms_with_values = set(synonyms) & set(synonym_values.keys())
    
    if not len(synonyms_with_values):
      continue
    
    avg = 0
    total = 0
    for syn in synonyms_with_values:
      value = synonym_values[syn]
      avg = (avg * total + float(value)) / (total + 1)
      total += 1
    
    # print "Adding", word, avg
    synonym_values[word] = int(abs_ceil(avg))
  
  f.close()
开发者ID:tomconroy,项目名称:extend-afinn,代码行数:34,代码来源:compile-sentiment.py

示例6: scoreFile

def scoreFile(filename, targetWords, verbose=False):
    meanScore = 0.0
    baseWordCount = 0
    wordCount = 0
    f = file(filename)
    for l in f:
        wordScored = False
        fields = [x.strip().lower() for x in re.split(r"\s+", l)]
        if (targetWords is not None) and (fields[0] not in targetWords):
            continue
        baseSynsets = wordnet.synsets(fields[0])
        if baseSynsets is None:
            continue
        for word in fields[1:]:
            # Ignore identical word if it occurs
            if word == fields[0]:
                continue
            targetSynsets = wordnet.synsets(word)
            if targetSynsets is None:
                continue
            wordScore = scoreWord(baseSynsets, targetSynsets)
            meanScore += wordScore
            wordCount += 1
            wordScored = True
        baseWordCount += 1 if wordScored else 0
        if verbose:
            if (baseWordCount > 0) and (baseWordCount % 1000 == 0):
                print "Words scored : %d, Current Score : %f" % (
                    baseWordCount,
                    meanScore / (wordCount if wordCount > 0 else 1),
                )
    f.close()
    meanScore /= wordCount if wordCount > 0 else 1
    return {"baseWordCount": baseWordCount, "totalWordCount": wordCount, "meanScore": meanScore}
开发者ID:tgflynn,项目名称:NLP-Challenge,代码行数:34,代码来源:score.py

示例7: hypernyms

	def hypernyms(self, word, question):
		hyper = []
		sentence = self.parse(question)
		pos = ''
		for sent, tag in sentence[0]: 
			if sent == word:
				pos = tag
				break
		if pos in ['JJ','JJR','JJS']:
			for synset in wn.synsets(word, pos = wn.ADJ):
				for lemma in synset.lemmas():
					if lemma.name() not in hyper and len(hyper)<7:
						hyper.append(lemma.name())
		elif pos in ['NN','NNS']:
			for synset in wn.synsets(word, pos = wn.NOUN):
				for lemma in synset.lemmas():
					if lemma.name() not in hyper and len(hyper)<7:
						hyper.append(lemma.name())
		elif pos in ['VB','VBG','VBD','VBN','VBP','VBZ']:
			for synset in wn.synsets(word, pos = wn.VERB):
				for lemma in synset.lemmas():
					if lemma.name() not in hyper and len(hyper)<7:
						hyper.append(lemma.name())
		elif pos in ['RB','RBR','RBS']:
			for synset in wn.synsets(word, pos = wn.ADV):
				for lemma in synset.lemmas():
					if lemma.name() not in hyper and len(hyper)<7:
						hyper.append(lemma.name())
		return hyper
开发者ID:Saurav-95,项目名称:BrainX,代码行数:29,代码来源:qp.py

示例8: c_wn_max_path_similarity

def c_wn_max_path_similarity(score,word_from,word_to):
	"""
	WordNet path similarity for the most similar synsets. (1 if same word)
	
	This feature can be precomputed by EQUALS
	"""
	
	# Enforce returning 1 when words are equal (would be 0 if synset not found)
	# NOTE: since EQUALS precomputes this feature, the assignment in the second
	#       if is double. It is mantained to keep the indipendence on the imple-
	#       mentation of EQUALS.
	if not score.is_feature_set[score.EQUALS]:
		c_equals(score,word_from,word_to)
	if score.features[score.EQUALS] == 1:
		score.set_feature(score.WN_MAX_PATH_SIMILARITY,1)
		return
	
	# Compute the actual distance
	_r = 0
	
	for ss_from in wn.synsets(word_from.text):
		for ss_to in wn.synsets(word_to.text):
			current_similarity = ss_to.path_similarity(ss_from)
			if current_similarity > _r:
				_r = current_similarity
	
	score.set_feature(score.WN_MAX_PATH_SIMILARITY,_r)
开发者ID:dario-chiappetta,项目名称:TDM-lu,代码行数:27,代码来源:word.py

示例9: CollectSemcorSupersenses

def CollectSemcorSupersenses():
  oracle_matrix = collections.defaultdict(WordSupersenses)
  for sent in semcor.tagged_sents(tag='both'):
    for chk in sent:
      if chk.node and len(chk.node)>3 and chk.node[-3]=='.' and chk.node[-2:].isdigit():
        if chk[0].node.startswith('N'):
          pos = "n"
        elif chk[0].node.startswith('V'):
          pos = "v"
        else:
          continue
        lemmas = chk.node[:-3]
        wnsn = int(chk.node[-2:])
        ssets = wn.synsets(lemmas, pos)
        sorted_ssets = sorted(ssets, key=lambda x: x.name)
        filtered_ssets = None
        for lemma in lemmas.split("_"):  
          if not filtered_ssets or len(filtered_ssets) == 0:
            filtered_ssets = filter(lambda x: lemma in x.name, sorted_ssets)
        if filtered_ssets and len(filtered_ssets) > 0:
          sorted_ssets = filtered_ssets
        try:
          supersense = sorted_ssets[wnsn-1].lexname # prints 'noun.group
        except:
          #print("."),
          continue
        for lemma in lemmas.split("_"):        
          ssets = wn.synsets(lemma, pos)
          if len(ssets) > 0:
            if lemma.isdigit():
              lemma = "0"
            oracle_matrix[lemma].Add(supersense, "semcor")  
  return oracle_matrix      
开发者ID:wammar,项目名称:multilingual-embeddings-eval-portal,代码行数:33,代码来源:supersense_matrix.py

示例10: generatesynsets

def generatesynsets(table):
    table2 = []
    table3 = {}
    for i in table:
        search1 = "N.*"
        search2 = "V.*"
        if re.findall(search1, i[1]):
            x = wns.synsets(i[0], pos=wns.NOUN)
        elif re.findall(search2, i[1]):
            x = wns.synsets(i[0], pos=wns.VERB)
        for z in range(len(x)):
            for y in x[z].lemma_names:
                syn = 'SYN'
                if y not in ['match', 'be', 'in', 'is']:
                    table2.append((y, syn))
    test = 0
    test += 1
    for i in table2:
        try:
            table3[i] += test

        except:
            table3[i] = test

    return table3
开发者ID:Jigar54,项目名称:Cricinfo-Query-Responder-NLP,代码行数:25,代码来源:phase2-part3.py

示例11: polar_values

    def polar_values(self, positive_seeds, negative_seeds):
        self.values = []
        POS_tags = list(set(nltk.pos_tag(WordPunctTokenizer().tokenize(self.data))))
        words = []
        for (w, s) in POS_tags:
           w= w.lower()
           POS =  self.get_wordnet_pos(s)
           if POS =='' or re.match("^[\w]+$",w) == None:
               words.append('0')
           else:
               w+="."+POS
               w+=".01"
               words.append(w)
        negative_set = []
        for nw in negative_seeds:
            for s in wordnet.synsets(nw):
                negative_set.append(s)

        positive_set = []
        for pw in positive_seeds:
            for s in wordnet.synsets(pw):
                positive_set.append(s)

        self.eval_words(words, positive_set, negative_set)
        return self.values
开发者ID:sherifEwis,项目名称:polar_analyzer,代码行数:25,代码来源:polar_analyzer.py

示例12: getSynonym

def getSynonym(word, tag):
    pos_list = {"JJ":"ADJ","JJR":"ADJ", "JJS":"ADJ","NN":"NOUN","NNS":"NOUN","NPS":"NOUN","NP":"NOUN","RBR":"ADV","RBS":"ADV","RB":"ADV","VB":"VERB","VBD":"VERB","VBG":"VERB","VBN":"VERB","VBP":"VERB","VBZ":"VERB"};
    tag_list = pos_list.keys()
    li = {} 
    if tag in tag_list:
        dd = pos_list.get(tag)
        if dd == "VERB":
            tt =  wn.synsets(word,pos=wn.VERB)
            for key in tt:
                ss = key.lemma_names
                for s in ss:
                    li[s] = s
        if dd == "NOUN":
            tt =  wn.synsets(word,pos=wn.NOUN)
            for key in tt:
                ss = key.lemma_names
                for s in ss:
                    li[s] = s

        if dd == "ADV":
            tt =  wn.synsets(word,pos=wn.ADV)
            for key in tt:
                ss = key.lemma_names
                for s in ss:
                    li[s] = s
        if dd == "ADJ":
            tt =  wn.synsets(word,pos=wn.ADJ)
            for key in tt:
                ss = key.lemma_names
                for s in ss:
                    li[s] = s
    return li.keys()
开发者ID:pulishahu,项目名称:nlp,代码行数:32,代码来源:test.py

示例13: xhyper

def xhyper(words)->[str]:
    '''returns the highest order x hypernyms'''
    x = UI.request_x()
    print("\nNote: this program will use the first parallel synset if there are any")
    print("\nGathering data...")
    result = [x]
    hyp = lambda w: w.hypernyms()
    #This would pick up the deepest branch's depth -> valueAt returns None -> returns None
    #depth = lambda L: isinstance(L, list) and max(map(depth, L))+1
    for i in range(len(words)):
        synsets = wordnet.synsets(words[i]) 
        if len(synsets) > 0:
            for s in range(len(synsets)):
                hyper = wordnet.synsets(words[i])[s].tree(hyp)
                if (hyper[0].pos() in ['a','s','r']):
                    result.append([words[i], 'None', 'None', [None]])
                    continue
                d = first_depth(hyper) - 1
                xhyper = []
                for j in range(x):
                    xhyper.append(valueAt(d - j, hyper))
                    if xhyper[-1] is None:
                        break
                result.append([words[i], pos_redef(hyper[0].pos()), hyper[0], xhyper])
        else:
            result.append([words[i], 'None', 'None', [None]])
    return result
开发者ID:Malthanatos,项目名称:Bifrost,代码行数:27,代码来源:controller.py

示例14: userEnteredWordSensor

def userEnteredWordSensor(user_input):
	#what stage are we currently in ? -- whether AS,IM or WI ?
	#what response did user enter ?
	if exactly_right: 
			#save our total action plan.	
			cursor.executeQuery("insert into path values('',session['uid'],session['wordid']")
			pathid=cursor.executeQuery("select pathid from path where wordid = session['wordid']")	
			cursor.executeQuery("insert into waypoint values('',pathid,session['type'],session['waypoint_info'])")
			#LOG the path
			#procede to the next word.
			perform()
			pass;
	elif nearly_right:
			#nearly right means -->
				#one of the tags
				wid=cursor.executeQuery("Select wordid from words where word like 'session['word']'")
				tags=cursor.executeQuery("Select tags from words where wordid=wid")
				for tag in tags:
					if ( tag == word )
						#perform action sequence for NEXT
						break;
					#its synonym
					for s in wn.synsets('session['word']'):
								if( s == user_input )
									#perform action sequence for NEXT
									break;
					else:
					#tags's synonym...?	
							for s in wn.synsets('tag'):
								if( s == user_input )
									#perform action sequence for NEXT
									break;
开发者ID:ClaudiaWinchester,项目名称:dumb-charades-ai,代码行数:32,代码来源:sensors.py

示例15: wndist

def wndist(fs):
    """
    Distance between NP1 and NP2 in WordNet (using the first sense only)
    """

    wndist=-100000

    i_pos=__get_pos__(fs.article,fs.sentence,fs.offset_begin,fs.offset_end)
    j_pos=__get_pos__(fs.article,fs.sentence_ref,fs.offset_begin_ref,fs.offset_end_ref)

    #print "Orig:", fs.token, '\t', fs.token_ref

    if i_pos.startswith('NN') and j_pos.startswith('NN') and not i_pos.endswith('P') and not j_pos.endswith('P'):
        # considering only common nouns
        lemmatizer = nltk.WordNetLemmatizer()
        i=lemmatizer.lemmatize(fs.i_cleaned, pos='n')
        j=lemmatizer.lemmatize(fs.j_cleaned, pos='n')
        synsets_i=wn.synsets(i)
        synsets_j=wn.synsets(j)
        if len(synsets_i)>0 and len(synsets_j)>0:
            wn_sense1_i=synsets_i[0]
            wn_sense1_j=synsets_j[0]
            wn_pos_i=str(wn_sense1_i).split('.')[1]
            wn_pos_j=str(wn_sense1_j).split('.')[1]
            if wn_pos_i==wn_pos_j:
                wndist=wn_sense1_i.lch_similarity(wn_sense1_j)
                wndist=(ceil(wndist * 100) / 100.0)
                #print "Lemmatized:", i, '\t', j, '\t', str(wndist)

    #print
    #print

    return "wndist={}".format(wndist)
开发者ID:pkarmstr,项目名称:coreference-project,代码行数:33,代码来源:feature_functions.py


注:本文中的nltk.corpus.wordnet.synsets函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。