本文整理汇总了Python中nltk.corpus.wordnet.synsets函数的典型用法代码示例。如果您正苦于以下问题:Python synsets函数的具体用法?Python synsets怎么用?Python synsets使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了synsets函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: findSimilarity
def findSimilarity(self):
#As we recommend only one item first item of this list will be recommended item
#Second item can be list of items
'''So what we try to do is get exact synset of first item and get 10 synsets (to reduce computation costs) of second list of items over
which the first item was preferred/recommended'''
recommendation = wn.synsets(self.recoItems[0]) # @UndefinedVariable
recommendationFiltered = []
for eachSyn in recommendation:
if self.recoItems[0] in str(eachSyn):
recommendationFiltered.append(eachSyn)
choices = {}
for eachItem in self.recoItems[1]:
choices[eachItem] = wn.synsets(eachItem)[:10] # @UndefinedVariable getting only 10 items
choiceScores = {}
for key, value in choices.iteritems():
choiceScores[key] = []
for eachValue in choices[key]:
for eachRecoSyn in recommendationFiltered:
choiceScores[key].append(eachRecoSyn.path_similarity(eachValue))
maxChoiceScores = {}
for eachKey in choiceScores.keys():
maxChoiceScores[eachKey] = max(choiceScores[eachKey])
return maxChoiceScores
示例2: parseLyrics2
def parseLyrics2(outlist):
bandLyricInfo = {}
master = [['death', 0],['violence',0],['sacrifice',0],['nature',0],['peace',0],['storm',0],['spirit',0],[ 'dark',0],['scream',0],['pain',0],['blood',0],['flesh',0],['love',0],['greed',0],['poison',0],['anger',0],['revenge',0],['misery',0],['hell',0],['heaven',0],['hate',0],['soul',0],['battle',0],['ghost',0],['joy',0],['light',0],['omen',0],['miracle',0],['magic',0],['universe',0],['disease',0],['god',0],['satan',0],['struggle',0],['heart',0]]
for key in outlist:
templist = copy.deepcopy(master) ;
#key = 'Queensryche'
raw = outlist[key];
raw = raw.lower();
words = re.findall(r'\w+', raw,flags = re.UNICODE | re.LOCALE) # punctuation
imp_words = filter(lambda x: x not in stopwords.words('english'), words) # filter noise
lmt = WordNetLemmatizer()
words_new = [lmt.lemmatize(x) for x in words]
dw = list(set(words_new))
for word in dw:
for m in templist:
p1 = wordnet.synsets(word) ;
p2 = wordnet.synsets(m[0]) ;
if(len(p1) >0 and len(p2) >0):
c = p1[0].wup_similarity(p2[0])
if(c > m[1]):
m[1] = c
# sort words according to similarity
tnew = sorted(templist,key=lambda val:val[1],reverse=True) [0:10] ;
# remove the other column
for l in tnew:
del l[1]
print 'Done ',key
#break ;
bandLyricInfo[key] = tnew
#del templist
return bandLyricInfo
示例3: relation
def relation(a,b) :
''' Given two words(strings) returns a number that denotes relation between
the two words.
Parameters
----------
a : string
b : string
Returns
-------
float
relation (less than 1) between two strings
Notes
-----
First it applies BFS on the nltk wordnet and finds the least distance between
the two given words. If the distance is x the function returns 1/(x+1), else return 0.
'''
a = wn.synsets(a)
b = wn.synsets(b)
visited_a = set([])
visited_b = set([])
stemmed_a = set([])
stemmed_b = set([])
depth = 0
while True:
if depth > 2:
return 0
new_a = set([])
depth += 1
for syn in a:
if stemmer.stem(syn.lemma_names[0]) in stemmed_b:
return 1.0/depth
if syn in visited_a:
continue
visited_a.add(syn)
stemmed_a.add(stemmer.stem(syn.lemma_names[0]))
hyp = set(syn.hyponyms())
for lemma in syn.lemma_names:
None
hyp |= set(wn.synsets(lemma))
new_a |= hyp
a = new_a
new_b = set([])
depth += 1
for syn in b:
if stemmer.stem(syn.lemma_names[0]) in stemmed_a:
return 1.0/depth
if syn in visited_b:
continue
visited_b.add(syn)
stemmed_b.add(stemmer.stem(syn.lemma_names[0]))
hyp = set(syn.hyponyms())
for lemma in syn.lemma_names:
None
hyp |= set(wn.synsets(lemma))
new_b |= hyp
b = new_b
示例4: tell
def tell(para1,para2):
#Strip anything but not alphanum
para1=re.sub(r'[^\w ]+', '', para1)
para2=re.sub(r'[^\w ]+', '', para2)
para1=para1.lower().split()
para2=para2.lower().split()
if para1==[] or para2==[]:
return 0
if not filter(lambda t:t.lower() not in stopwords, para1) == []:
para1=filter(lambda t:t.lower() not in stopwords, para1)
if not filter(lambda t:t.lower() not in stopwords, para2) == []:
para2=filter(lambda t:t.lower() not in stopwords, para2)
score=len(set(para1).intersection(para2))
score_1=float(score)/math.sqrt(len(para2)*len(para1))
para1_with_dictionary=reduce(lambda x,y:x+y, map(lambda word:[l.name for s in wordnet.synsets(word) for l in s.lemmas],para1))
para1_with_dictionary=map(lambda ele:ele.lower(), para1_with_dictionary)
#^^ Returns duplicated elements as well. So we need to remove the duplicates. Converting into set does that
para2_with_dictionary=reduce(lambda x,y:x+y, map(lambda word:[l.name for s in wordnet.synsets(word) for l in s.lemmas],para2))
para2_with_dictionary=map(lambda ele:ele.lower(), para2_with_dictionary)
#^^ Returns duplicated elements as well. So we need to remove the duplicates. While taking intersection the same is handled
score1=len(set(para1_with_dictionary).intersection(para2))
score2=len(set(para2_with_dictionary).intersection(para1))
score_2=float(max(score1,score2))/min(len(para2),len(para1))
score=(score_1+score_2)/2
return score
示例5: parse_file
def parse_file(f):
for l in f.readlines():
word = l.strip()
synsets = wn.synsets(word)
if word in synonym_values:
continue
# get first order synonyms
synonyms = set()
for synset in synsets:
synonyms = set(synonyms) | set(synset.lemma_names)
# add in synonyms of those synonyms
for syn in synonyms:
for syn_synset in wn.synsets(syn):
synonyms = set(synonyms) | set(syn_synset.lemma_names)
synonyms_with_values = set(synonyms) & set(synonym_values.keys())
if not len(synonyms_with_values):
continue
avg = 0
total = 0
for syn in synonyms_with_values:
value = synonym_values[syn]
avg = (avg * total + float(value)) / (total + 1)
total += 1
# print "Adding", word, avg
synonym_values[word] = int(abs_ceil(avg))
f.close()
示例6: scoreFile
def scoreFile(filename, targetWords, verbose=False):
meanScore = 0.0
baseWordCount = 0
wordCount = 0
f = file(filename)
for l in f:
wordScored = False
fields = [x.strip().lower() for x in re.split(r"\s+", l)]
if (targetWords is not None) and (fields[0] not in targetWords):
continue
baseSynsets = wordnet.synsets(fields[0])
if baseSynsets is None:
continue
for word in fields[1:]:
# Ignore identical word if it occurs
if word == fields[0]:
continue
targetSynsets = wordnet.synsets(word)
if targetSynsets is None:
continue
wordScore = scoreWord(baseSynsets, targetSynsets)
meanScore += wordScore
wordCount += 1
wordScored = True
baseWordCount += 1 if wordScored else 0
if verbose:
if (baseWordCount > 0) and (baseWordCount % 1000 == 0):
print "Words scored : %d, Current Score : %f" % (
baseWordCount,
meanScore / (wordCount if wordCount > 0 else 1),
)
f.close()
meanScore /= wordCount if wordCount > 0 else 1
return {"baseWordCount": baseWordCount, "totalWordCount": wordCount, "meanScore": meanScore}
示例7: hypernyms
def hypernyms(self, word, question):
hyper = []
sentence = self.parse(question)
pos = ''
for sent, tag in sentence[0]:
if sent == word:
pos = tag
break
if pos in ['JJ','JJR','JJS']:
for synset in wn.synsets(word, pos = wn.ADJ):
for lemma in synset.lemmas():
if lemma.name() not in hyper and len(hyper)<7:
hyper.append(lemma.name())
elif pos in ['NN','NNS']:
for synset in wn.synsets(word, pos = wn.NOUN):
for lemma in synset.lemmas():
if lemma.name() not in hyper and len(hyper)<7:
hyper.append(lemma.name())
elif pos in ['VB','VBG','VBD','VBN','VBP','VBZ']:
for synset in wn.synsets(word, pos = wn.VERB):
for lemma in synset.lemmas():
if lemma.name() not in hyper and len(hyper)<7:
hyper.append(lemma.name())
elif pos in ['RB','RBR','RBS']:
for synset in wn.synsets(word, pos = wn.ADV):
for lemma in synset.lemmas():
if lemma.name() not in hyper and len(hyper)<7:
hyper.append(lemma.name())
return hyper
示例8: c_wn_max_path_similarity
def c_wn_max_path_similarity(score,word_from,word_to):
"""
WordNet path similarity for the most similar synsets. (1 if same word)
This feature can be precomputed by EQUALS
"""
# Enforce returning 1 when words are equal (would be 0 if synset not found)
# NOTE: since EQUALS precomputes this feature, the assignment in the second
# if is double. It is mantained to keep the indipendence on the imple-
# mentation of EQUALS.
if not score.is_feature_set[score.EQUALS]:
c_equals(score,word_from,word_to)
if score.features[score.EQUALS] == 1:
score.set_feature(score.WN_MAX_PATH_SIMILARITY,1)
return
# Compute the actual distance
_r = 0
for ss_from in wn.synsets(word_from.text):
for ss_to in wn.synsets(word_to.text):
current_similarity = ss_to.path_similarity(ss_from)
if current_similarity > _r:
_r = current_similarity
score.set_feature(score.WN_MAX_PATH_SIMILARITY,_r)
示例9: CollectSemcorSupersenses
def CollectSemcorSupersenses():
oracle_matrix = collections.defaultdict(WordSupersenses)
for sent in semcor.tagged_sents(tag='both'):
for chk in sent:
if chk.node and len(chk.node)>3 and chk.node[-3]=='.' and chk.node[-2:].isdigit():
if chk[0].node.startswith('N'):
pos = "n"
elif chk[0].node.startswith('V'):
pos = "v"
else:
continue
lemmas = chk.node[:-3]
wnsn = int(chk.node[-2:])
ssets = wn.synsets(lemmas, pos)
sorted_ssets = sorted(ssets, key=lambda x: x.name)
filtered_ssets = None
for lemma in lemmas.split("_"):
if not filtered_ssets or len(filtered_ssets) == 0:
filtered_ssets = filter(lambda x: lemma in x.name, sorted_ssets)
if filtered_ssets and len(filtered_ssets) > 0:
sorted_ssets = filtered_ssets
try:
supersense = sorted_ssets[wnsn-1].lexname # prints 'noun.group
except:
#print("."),
continue
for lemma in lemmas.split("_"):
ssets = wn.synsets(lemma, pos)
if len(ssets) > 0:
if lemma.isdigit():
lemma = "0"
oracle_matrix[lemma].Add(supersense, "semcor")
return oracle_matrix
示例10: generatesynsets
def generatesynsets(table):
table2 = []
table3 = {}
for i in table:
search1 = "N.*"
search2 = "V.*"
if re.findall(search1, i[1]):
x = wns.synsets(i[0], pos=wns.NOUN)
elif re.findall(search2, i[1]):
x = wns.synsets(i[0], pos=wns.VERB)
for z in range(len(x)):
for y in x[z].lemma_names:
syn = 'SYN'
if y not in ['match', 'be', 'in', 'is']:
table2.append((y, syn))
test = 0
test += 1
for i in table2:
try:
table3[i] += test
except:
table3[i] = test
return table3
示例11: polar_values
def polar_values(self, positive_seeds, negative_seeds):
self.values = []
POS_tags = list(set(nltk.pos_tag(WordPunctTokenizer().tokenize(self.data))))
words = []
for (w, s) in POS_tags:
w= w.lower()
POS = self.get_wordnet_pos(s)
if POS =='' or re.match("^[\w]+$",w) == None:
words.append('0')
else:
w+="."+POS
w+=".01"
words.append(w)
negative_set = []
for nw in negative_seeds:
for s in wordnet.synsets(nw):
negative_set.append(s)
positive_set = []
for pw in positive_seeds:
for s in wordnet.synsets(pw):
positive_set.append(s)
self.eval_words(words, positive_set, negative_set)
return self.values
示例12: getSynonym
def getSynonym(word, tag):
pos_list = {"JJ":"ADJ","JJR":"ADJ", "JJS":"ADJ","NN":"NOUN","NNS":"NOUN","NPS":"NOUN","NP":"NOUN","RBR":"ADV","RBS":"ADV","RB":"ADV","VB":"VERB","VBD":"VERB","VBG":"VERB","VBN":"VERB","VBP":"VERB","VBZ":"VERB"};
tag_list = pos_list.keys()
li = {}
if tag in tag_list:
dd = pos_list.get(tag)
if dd == "VERB":
tt = wn.synsets(word,pos=wn.VERB)
for key in tt:
ss = key.lemma_names
for s in ss:
li[s] = s
if dd == "NOUN":
tt = wn.synsets(word,pos=wn.NOUN)
for key in tt:
ss = key.lemma_names
for s in ss:
li[s] = s
if dd == "ADV":
tt = wn.synsets(word,pos=wn.ADV)
for key in tt:
ss = key.lemma_names
for s in ss:
li[s] = s
if dd == "ADJ":
tt = wn.synsets(word,pos=wn.ADJ)
for key in tt:
ss = key.lemma_names
for s in ss:
li[s] = s
return li.keys()
示例13: xhyper
def xhyper(words)->[str]:
'''returns the highest order x hypernyms'''
x = UI.request_x()
print("\nNote: this program will use the first parallel synset if there are any")
print("\nGathering data...")
result = [x]
hyp = lambda w: w.hypernyms()
#This would pick up the deepest branch's depth -> valueAt returns None -> returns None
#depth = lambda L: isinstance(L, list) and max(map(depth, L))+1
for i in range(len(words)):
synsets = wordnet.synsets(words[i])
if len(synsets) > 0:
for s in range(len(synsets)):
hyper = wordnet.synsets(words[i])[s].tree(hyp)
if (hyper[0].pos() in ['a','s','r']):
result.append([words[i], 'None', 'None', [None]])
continue
d = first_depth(hyper) - 1
xhyper = []
for j in range(x):
xhyper.append(valueAt(d - j, hyper))
if xhyper[-1] is None:
break
result.append([words[i], pos_redef(hyper[0].pos()), hyper[0], xhyper])
else:
result.append([words[i], 'None', 'None', [None]])
return result
示例14: userEnteredWordSensor
def userEnteredWordSensor(user_input):
#what stage are we currently in ? -- whether AS,IM or WI ?
#what response did user enter ?
if exactly_right:
#save our total action plan.
cursor.executeQuery("insert into path values('',session['uid'],session['wordid']")
pathid=cursor.executeQuery("select pathid from path where wordid = session['wordid']")
cursor.executeQuery("insert into waypoint values('',pathid,session['type'],session['waypoint_info'])")
#LOG the path
#procede to the next word.
perform()
pass;
elif nearly_right:
#nearly right means -->
#one of the tags
wid=cursor.executeQuery("Select wordid from words where word like 'session['word']'")
tags=cursor.executeQuery("Select tags from words where wordid=wid")
for tag in tags:
if ( tag == word )
#perform action sequence for NEXT
break;
#its synonym
for s in wn.synsets('session['word']'):
if( s == user_input )
#perform action sequence for NEXT
break;
else:
#tags's synonym...?
for s in wn.synsets('tag'):
if( s == user_input )
#perform action sequence for NEXT
break;
示例15: wndist
def wndist(fs):
"""
Distance between NP1 and NP2 in WordNet (using the first sense only)
"""
wndist=-100000
i_pos=__get_pos__(fs.article,fs.sentence,fs.offset_begin,fs.offset_end)
j_pos=__get_pos__(fs.article,fs.sentence_ref,fs.offset_begin_ref,fs.offset_end_ref)
#print "Orig:", fs.token, '\t', fs.token_ref
if i_pos.startswith('NN') and j_pos.startswith('NN') and not i_pos.endswith('P') and not j_pos.endswith('P'):
# considering only common nouns
lemmatizer = nltk.WordNetLemmatizer()
i=lemmatizer.lemmatize(fs.i_cleaned, pos='n')
j=lemmatizer.lemmatize(fs.j_cleaned, pos='n')
synsets_i=wn.synsets(i)
synsets_j=wn.synsets(j)
if len(synsets_i)>0 and len(synsets_j)>0:
wn_sense1_i=synsets_i[0]
wn_sense1_j=synsets_j[0]
wn_pos_i=str(wn_sense1_i).split('.')[1]
wn_pos_j=str(wn_sense1_j).split('.')[1]
if wn_pos_i==wn_pos_j:
wndist=wn_sense1_i.lch_similarity(wn_sense1_j)
wndist=(ceil(wndist * 100) / 100.0)
#print "Lemmatized:", i, '\t', j, '\t', str(wndist)
#print
#print
return "wndist={}".format(wndist)