当前位置: 首页>>代码示例>>Python>>正文


Python wordnet.synset函数代码示例

本文整理汇总了Python中nltk.corpus.wordnet.synset函数的典型用法代码示例。如果您正苦于以下问题:Python synset函数的具体用法?Python synset怎么用?Python synset使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了synset函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: process_verb

def process_verb(verb):
    verb = verb[:-1] # Remove newline char
    with open('youtube_setof_verbs.txt') as f:
        verb_dict = f.read()
    verb_dict = verb_dict.split('\n')
    
    max_score = 0
    finl_verb = (verb, '<>')
    verb_list = re.findall('[A-Z][^A-Z]*', verb)
    
    for prob_verb in verb_list:
        if prob_verb[len(prob_verb)-3:] == 'ing':
            prob_verb = prob_verb[:-3] # Remove 'ing' from verb
            if prob_verb.lower() == 'cutt':
                prob_verb = 'cut'
        if wn.synsets(prob_verb):
            try:
                v1 = wn.synset(prob_verb + '.v.01')
                for yout_verb in verb_dict:
                    if yout_verb != '':
                        # if wn.synsets(yout_verb):
                        v2 = wn.synset(yout_verb + '.v.01')
                        score = v1.wup_similarity(v2)
                        if score > max_score:
                            finl_verb = (prob_verb, yout_verb)
                            max_score = score
            except:
                finl_verb = (prob_verb, '<>')
                pass
                
    # print finl_verb, max_score
    return finl_verb[1]
开发者ID:sumitb,项目名称:YouTube2Action,代码行数:32,代码来源:word.py

示例2: process_subj

def process_subj(subj, flag):
    if flag == 1:
        with open('youtube_setof_subjects.txt') as f:
            subj_dict = f.read()
        subj_dict = subj_dict.split('\n')
    elif flag == 2:
        with open('youtube_setof_objects.txt') as f:
            obj_dict = f.read()
        subj_dict = obj_dict.split('\n')
    
    max_score = 0
    finl_subj = (subj, '<>')
    subj_list = subj.split(',')

    if len(subj_list) == 1:
        return subj
    for prob_subj in subj_list:
        prob_subj = prob_subj.strip()
        if wn.synsets(prob_subj):
            try:
                v1 = wn.synset(prob_subj + '.n.01')
                for yout_subj in subj_dict:
                    if yout_subj != '':
                        v2 = wn.synset(yout_subj + '.n.01')
                        score = v1.wup_similarity(v2)
                        if score > max_score:
                            finl_subj = (prob_subj, yout_subj)
                            max_score = score
            except:
                finl_subj = (prob_subj, '<>')
                pass
                
    # print finl_verb, max_score
    return (finl_subj[1])
开发者ID:sumitb,项目名称:YouTube2Action,代码行数:34,代码来源:word.py

示例3: preprocess_docs

def preprocess_docs():
    stopwords = nltk.corpus.stopwords.words('english')
    corpus = list(filtered_corpus())
    counter = 0
    for train, topic, title, text in corpus:
        if counter % 10 == 0:
            print "%.2f %%\r" % (counter * 100.0 / len(corpus),),
            sys.stdout.flush()
        counter += 1
        text = [i for i in nltk.word_tokenize(title) if i.lower() not in stopwords]
        buf = []
        for word in text:
            synsets = wn.synsets(word)
            grain = []
            wheat = [] 
            for s in synsets:
                grain.append(s.path_similarity(wn.synset('wheat.n.02')))
                wheat.append(s.path_similarity(wn.synset('grain.n.08')))

            grain = [i for i in grain if i is not None]
            wheat = [i for i in wheat if i is not None]

            if len(grain) == 0:
                grain = 0
            else:
                grain = sum(grain) * 1.0 / len(grain)
            if len(wheat) == 0:
                wheat = 0
            else:
                wheat = sum(wheat) * 1.0 / len(wheat)
            buf.append((word, grain, wheat))
        yield train, topic, buf
    print ""
开发者ID:Sentimentron,项目名称:CS909-Excercise8,代码行数:33,代码来源:pre713.py

示例4: get_score

def get_score(tags, groups):
  sscore = 0
  scount = 0 
  illegal_word = 0

  if (tags != None ) :
   for g in groups:
    
    for x in k.tags:
     try : 
      #print str(x.text), 
      #check substring else calculate words similarity score
      if g in str(x.text).lower():
	sscore += 2.0
        scount += 1
      else:
       tag = wn.synset(str(x.text).lower()+'.n.01')
       group = wn.synset(g+ '.n.01')  
       sem = wn.path_similarity(group,tag)
       if sem >= 0.3 :
        sscore += sem
	scount += 1     
     except:
	illegal_word += 1
  if scount != 0 :
    return sscore/scount
  else :
    return 0
开发者ID:tushar19,项目名称:Web-Image-Ranking-Retrieval,代码行数:28,代码来源:imgsearch.py

示例5: getSenseSimilarity

def getSenseSimilarity(worda,wordb):

	"""

	find similarity betwwn word senses of two words

	"""

	wordasynsets = wn.synsets(worda)

	wordbsynsets = wn.synsets(wordb)

	synsetnamea = [wn.synset(str(syns.name)) for syns in wordasynsets]

	synsetnameb = [wn.synset(str(syns.name)) for syns in wordbsynsets]



	for sseta, ssetb in [(sseta,ssetb) for sseta in synsetnamea for ssetb in synsetnameb]:

		pathsim = sseta.path_similarity(ssetb)

		wupsim = sseta.wup_similarity(ssetb)

		if pathsim != None:

			print "Path Sim Score: ",pathsim," WUP Sim Score: ",wupsim,"\t",sseta.definition, "\t", ssetb.definition
开发者ID:dxd132630,项目名称:NeoPythonic,代码行数:27,代码来源:similarity.py

示例6: probability

def probability(tokens, category, dictionary, total):   	  
	if category == "sense":
		total_score = 0
		dic = dictionary
		if len(tokens) == 0:
			return 0
		for token in tokens:
			for dict_sense in dic:
				score = wn.synset(token).path_similarity(wn.synset(dict_sense))
				if score is not None:
					total_score += score * dic[dict_sense]
		return (total_score/len(tokens))
	else:
		p = 0 
		dic = dictionary
		total_instances = total
		for token in tokens:
		    if token in dic:
		    	token_prob = dic[token]
		    else:
		    	token_prob = 0
		    # smooth one out
		    curr = token_prob/float(total_instances)
		    p += curr  
	
	return p
开发者ID:aiqiliu,项目名称:AskReddit-analytics,代码行数:26,代码来源:titleAnalysis.py

示例7: get_similar_words

def get_similar_words(word):
    lemmas_noun = hypernyms_noun = lemmas_verb = hypernyms_verb =[]
    try:
        lemmas_noun =  [str(lemma.name()) for lemma in wn.synset(word + '.n.01').lemmas()]    
    except WordNetError:
        pass

    try:
        hypernyms_noun = [str(lemma.name()).split('.')[0] for lemma in wn.synset(word + '.n.01').hypernyms()]    
    except WordNetError:
        pass

    if len(lemmas_noun) == 0 and len(hypernyms_noun) == 0:
        """
        Only try verbs if there are no similar nouns
        """
        try:
            lemmas_verb =  [str(lemma.name()) for lemma in wn.synset(word + '.v.01').lemmas()]    
        except WordNetError:
            pass

        try:
            hypernyms_verb = [str(lemma.name()).split('.')[0] for lemma in wn.synset(word + '.v.01').hypernyms()]    
        except WordNetError:
            pass
    
    similar_words = lemmas_noun + hypernyms_noun + lemmas_verb + hypernyms_verb
    # filter words which are not purely alphabets (there will be words with underscore)
    # this is because if we want to process such words like "domestic_animal", we have to 
    # implement 2-grams search which is not done here
    pattern = re.compile('^[a-zA-Z]+$')
    return filter(lambda x: pattern.match(x) and x != word, similar_words)
开发者ID:seowyanyi,项目名称:cs3245-4,代码行数:32,代码来源:helper.py

示例8: expand_queries

def expand_queries(file):
    '''
    For each term in a query, takes the first synset of the word from wordnet and adds all synonyms of that synset
    '''
    file = open(file)
    for sentence in file:
        sentence = sentence.strip()
        if sentence.find('<text>') != -1:
            query = sentence[sentence.find('>')+1: sentence.rfind('<')]
            additions = ''
            updated_q = nltk.pos_tag(nltk.wordpunct_tokenize(query.lower()))
            full_q = query
            for word, pos in updated_q:
               if word not in stopwords.words('english'):
                   looking_for = str(word)+'.'+str(get_wordnet_pos(pos))+'.01'                   
                   synsets = wn.synsets(word)
                   if looking_for in str(synsets):
                       new_words = (wn.synset(looking_for).lemma_names) #was .definition
                       for new_word in new_words:
                           if new_word.lower() != word.lower():
                               full_q = full_q +' '+ str(new_word)
                   else:
                       if wn.morphy(word) != None:
                           word = wn.morphy(word)
                           looking_for = str(word)+'.'+str(get_wordnet_pos(pos))+'.01'
                           print str(looking_for) + ' THIS IS WORD'
                           synsets = wn.synsets(word)
                           if looking_for in str(synsets):
                               new_words = (wn.synset(looking_for).lemma_names) #was .definition
                               for new_word in new_words:
                                   if new_word.lower() != word.lower():
                                       full_q = full_q +' '+ str(new_word)
            print query + ' '+ full_q
开发者ID:britth,项目名称:inls890-microblog,代码行数:33,代码来源:wordnetExpansion.py

示例9: get_similarity

	def get_similarity(self,word1,word2):
		'''计算相似度:基于WordNet语义词典'''
		'''
		print 'before stemmed:',word1
		print 'after stemmed:',wn.morphy(word1.lower())
		print 'before stemmed:',word2
		print 'after stemmed:',wn.morphy(word2.lower())
		'''
		#stemmed word
		if wn.morphy(word1.lower()) != None :
			word1 = wn.morphy(word1.lower())
		if wn.morphy(word2.lower()) != None :
			word2 = wn.morphy(word2.lower()) 
		word1_synsets = wn.synsets(word1)
		#print word1_synsets
		word2_synsets = wn.synsets(word2)
		#print word2_synsets
		sim = 0

		for syn1 in word1_synsets:
			w1 = wn.synset(syn1.name())
			for syn2 in word2_synsets:
				w2 = wn.synset(syn2.name())
				tmp = w1.path_similarity(w2)
				#print tmp,syn1.name(),syn2.name()
				if tmp > sim:
					sim = tmp
		return sim
开发者ID:cnspica,项目名称:ASExtractor,代码行数:28,代码来源:EnKeywordExtraction.py

示例10: print_other_lexical_rel

def print_other_lexical_rel():
    good1 = wn.synset('good.a.01')
    wn.lemmas('good')
    print("Antonyms of 'good': " + str(good1.lemmas()[0].antonyms()))
    print("")
    print("Entailment of 'walk': " + str(wn.synset('walk.v.01').entailments()))
    print("")
开发者ID:anirudhcoder,项目名称:Natural-Language-Processing,代码行数:7,代码来源:hw2-part2-wordnet-examples.py

示例11: overlapCount

    def overlapCount(self, sentence):
        #set count to be one so we can guess in case there are no sentences with overlap
        count = 1

        sWiki = TextBlob(self.arrayToString(sentence))
        sVerbs = self.getVerbs(sWiki)

        #compare verbs for similarities and based on wordnet's similarity score
        #if they're exactly the same, they'll score 1
        for sverb in sVerbs:
            synv = wn.synset(sverb + '.v.01')
            for qverb in self.questionVerbs:
                synq = wn.synset(qverb + '.v.01')
                count += synv.path_similarity(synq)

        #remove stop words from sentence AFTER we've gotten POS tags
        s = self.removeStopWords(sentence)
        sLower = self.removeStopWords(sentence.lower())

        for word in self.qList:
             if word in s:
                 count += 1
             else:
                 if word.lower() in sLower:
                     count += 0.1
        return count
开发者ID:FlyingGroundhogs,项目名称:QASystem,代码行数:26,代码来源:VOverlap.py

示例12: compare

 def compare(self, word1, word2):
     tmp1 = wn.synsets(word1)[0].name
     tmp2 = wn.synsets(word2)[0].name
     w1 = wn.synset(tmp1)
     w2 = wn.synset(tmp2)
     val = w1.wup_similarity(w2)
     return val
开发者ID:danjamker,项目名称:N-Fly,代码行数:7,代码来源:WordNet.py

示例13: is_ingredient

def is_ingredient(word):
    """
    Return True if the word is an ingredient, False otherwise.

    >>> is_ingredient('milk')
    True
    >>> is_ingredient('blackberries')
    True
    >>> is_ingredient('Canada')
    False
    >>> is_ingredient('breakfast')
    False
    >>> is_ingredient('dish')
    False
    """
    reject_synsets = ['meal.n.01', 'meal.n.02', 'dish.n.02', 'vitamin.n.01']
    reject_synsets = set(wordnet.synset(w) for w in reject_synsets)
    accept_synsets = ['food.n.01', 'food.n.02']
    accept_synsets = set(wordnet.synset(w) for w in accept_synsets)
    for word_synset in wordnet.synsets(word, wordnet.NOUN):
        all_synsets = set(word_synset.closure(lambda s: s.hypernyms()))
        all_synsets.add(word_synset)
        for synset in reject_synsets:
            if synset in all_synsets:
                return False
        for synset in accept_synsets:
            if synset in all_synsets:
                return True
    return word in wordlists.ingredients
开发者ID:JoshRosen,项目名称:cmps140_creative_cooking_assistant,代码行数:29,代码来源:ingredients.py

示例14: ontoList

	def ontoList(self, synset):
		# things to pick from
		if self.pos == 'v':
			ln = wn.synset(synset).lexname.split('.')[1]
			hyper = self.lemmatize(self.getHypernyms(synset))
			definition = self.getDefinition(synset)
			lemmas = self.lemmatize(self.getLemmas(synset))
			examples = self.getExamples(synset)
			strings = [string.replace("_", " ") for string in self.getFrameStrings(synset)]
			hypo = self.lemmatize(self.getHyponyms(synset))  
			ontologyList = [strings, ln, lemmas, examples, hypo, definition, hyper]
		else:
			ln = wn.synset(synset).lexname.split('.')[1]
			hyper = self.lemmatize(self.getHypernyms(synset))
			definition = self.getDefinition(synset)
			lemmas = self.lemmatize(self.getLemmas(synset))
			examples = self.getExamples(synset)
			hypo = self.lemmatize(self.getHyponyms(synset)) 
			ontologyList = [ln, lemmas, examples, hypo, definition, hyper]

		returnList = list()
		for o in ontologyList:
			if o:
				returnList.append(o)
		return returnList
开发者ID:aankit,项目名称:centrality,代码行数:25,代码来源:wnQuery_dev.py

示例15: calculate_and_write_edge_weigthings_for_synsets

def calculate_and_write_edge_weigthings_for_synsets(synset_filenames_dict, file_name):
  max_co_occurrence = calculate_max_co_occurrence(synset_filenames_dict)
  edge_weigthings_for_synsets = dict()
  how_many_added = 0
  how_many_done = 0
  how_many_to_do = len(synset_filenames_dict.keys()) * (len(synset_filenames_dict.keys())-1)
  write_edge_weightings_to_file(dict(), file_name)

  for synset1, filenames1 in synset_filenames_dict.iteritems():
    for synset2, filenames2 in synset_filenames_dict.iteritems():
      if synset1 < synset2:
        how_many_done += 1
        #if (synset1.name, synset2.name) not in similarity_histogram:
        similarity = wn.synset(synset1).lch_similarity(wn.synset(synset2))
        co_occurence = len(set(synset_filenames_dict[synset1]).intersection(set(synset_filenames_dict[synset2])))
        normalized_co_occurrence = co_occurence/max_co_occurrence
        if similarity < 2.0:
          similarity = 0
        if normalized_co_occurrence < 0.4:
          normalized_co_occurrence = 0
        edge_weighting = similarity + 4*normalized_co_occurrence
        if edge_weighting != 0:
          edge_weigthings_for_synsets[(synset1, synset2)] = edge_weighting
          how_many_added += 1
        if how_many_added > 1000:
          print_status("Done with " + str(how_many_done) + " von " + str(how_many_to_do) + "\n")
          write_edge_weightings_to_file(edge_weigthings_for_synsets, file_name, append_to_file=True)
          edge_weigthings_for_synsets = dict()
          how_many_added = 0
  write_edge_weightings_to_file(edge_weigthings_for_synsets, file_name, append_to_file=True)
开发者ID:nicolas-fricke,项目名称:semmul2013-group1,代码行数:30,代码来源:mcl_keyword_clustering.py


注:本文中的nltk.corpus.wordnet.synset函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。