本文整理汇总了Python中nltk.corpus.wordnet.synset函数的典型用法代码示例。如果您正苦于以下问题:Python synset函数的具体用法?Python synset怎么用?Python synset使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了synset函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: process_verb
def process_verb(verb):
verb = verb[:-1] # Remove newline char
with open('youtube_setof_verbs.txt') as f:
verb_dict = f.read()
verb_dict = verb_dict.split('\n')
max_score = 0
finl_verb = (verb, '<>')
verb_list = re.findall('[A-Z][^A-Z]*', verb)
for prob_verb in verb_list:
if prob_verb[len(prob_verb)-3:] == 'ing':
prob_verb = prob_verb[:-3] # Remove 'ing' from verb
if prob_verb.lower() == 'cutt':
prob_verb = 'cut'
if wn.synsets(prob_verb):
try:
v1 = wn.synset(prob_verb + '.v.01')
for yout_verb in verb_dict:
if yout_verb != '':
# if wn.synsets(yout_verb):
v2 = wn.synset(yout_verb + '.v.01')
score = v1.wup_similarity(v2)
if score > max_score:
finl_verb = (prob_verb, yout_verb)
max_score = score
except:
finl_verb = (prob_verb, '<>')
pass
# print finl_verb, max_score
return finl_verb[1]
示例2: process_subj
def process_subj(subj, flag):
if flag == 1:
with open('youtube_setof_subjects.txt') as f:
subj_dict = f.read()
subj_dict = subj_dict.split('\n')
elif flag == 2:
with open('youtube_setof_objects.txt') as f:
obj_dict = f.read()
subj_dict = obj_dict.split('\n')
max_score = 0
finl_subj = (subj, '<>')
subj_list = subj.split(',')
if len(subj_list) == 1:
return subj
for prob_subj in subj_list:
prob_subj = prob_subj.strip()
if wn.synsets(prob_subj):
try:
v1 = wn.synset(prob_subj + '.n.01')
for yout_subj in subj_dict:
if yout_subj != '':
v2 = wn.synset(yout_subj + '.n.01')
score = v1.wup_similarity(v2)
if score > max_score:
finl_subj = (prob_subj, yout_subj)
max_score = score
except:
finl_subj = (prob_subj, '<>')
pass
# print finl_verb, max_score
return (finl_subj[1])
示例3: preprocess_docs
def preprocess_docs():
stopwords = nltk.corpus.stopwords.words('english')
corpus = list(filtered_corpus())
counter = 0
for train, topic, title, text in corpus:
if counter % 10 == 0:
print "%.2f %%\r" % (counter * 100.0 / len(corpus),),
sys.stdout.flush()
counter += 1
text = [i for i in nltk.word_tokenize(title) if i.lower() not in stopwords]
buf = []
for word in text:
synsets = wn.synsets(word)
grain = []
wheat = []
for s in synsets:
grain.append(s.path_similarity(wn.synset('wheat.n.02')))
wheat.append(s.path_similarity(wn.synset('grain.n.08')))
grain = [i for i in grain if i is not None]
wheat = [i for i in wheat if i is not None]
if len(grain) == 0:
grain = 0
else:
grain = sum(grain) * 1.0 / len(grain)
if len(wheat) == 0:
wheat = 0
else:
wheat = sum(wheat) * 1.0 / len(wheat)
buf.append((word, grain, wheat))
yield train, topic, buf
print ""
示例4: get_score
def get_score(tags, groups):
sscore = 0
scount = 0
illegal_word = 0
if (tags != None ) :
for g in groups:
for x in k.tags:
try :
#print str(x.text),
#check substring else calculate words similarity score
if g in str(x.text).lower():
sscore += 2.0
scount += 1
else:
tag = wn.synset(str(x.text).lower()+'.n.01')
group = wn.synset(g+ '.n.01')
sem = wn.path_similarity(group,tag)
if sem >= 0.3 :
sscore += sem
scount += 1
except:
illegal_word += 1
if scount != 0 :
return sscore/scount
else :
return 0
示例5: getSenseSimilarity
def getSenseSimilarity(worda,wordb):
"""
find similarity betwwn word senses of two words
"""
wordasynsets = wn.synsets(worda)
wordbsynsets = wn.synsets(wordb)
synsetnamea = [wn.synset(str(syns.name)) for syns in wordasynsets]
synsetnameb = [wn.synset(str(syns.name)) for syns in wordbsynsets]
for sseta, ssetb in [(sseta,ssetb) for sseta in synsetnamea for ssetb in synsetnameb]:
pathsim = sseta.path_similarity(ssetb)
wupsim = sseta.wup_similarity(ssetb)
if pathsim != None:
print "Path Sim Score: ",pathsim," WUP Sim Score: ",wupsim,"\t",sseta.definition, "\t", ssetb.definition
示例6: probability
def probability(tokens, category, dictionary, total):
if category == "sense":
total_score = 0
dic = dictionary
if len(tokens) == 0:
return 0
for token in tokens:
for dict_sense in dic:
score = wn.synset(token).path_similarity(wn.synset(dict_sense))
if score is not None:
total_score += score * dic[dict_sense]
return (total_score/len(tokens))
else:
p = 0
dic = dictionary
total_instances = total
for token in tokens:
if token in dic:
token_prob = dic[token]
else:
token_prob = 0
# smooth one out
curr = token_prob/float(total_instances)
p += curr
return p
示例7: get_similar_words
def get_similar_words(word):
lemmas_noun = hypernyms_noun = lemmas_verb = hypernyms_verb =[]
try:
lemmas_noun = [str(lemma.name()) for lemma in wn.synset(word + '.n.01').lemmas()]
except WordNetError:
pass
try:
hypernyms_noun = [str(lemma.name()).split('.')[0] for lemma in wn.synset(word + '.n.01').hypernyms()]
except WordNetError:
pass
if len(lemmas_noun) == 0 and len(hypernyms_noun) == 0:
"""
Only try verbs if there are no similar nouns
"""
try:
lemmas_verb = [str(lemma.name()) for lemma in wn.synset(word + '.v.01').lemmas()]
except WordNetError:
pass
try:
hypernyms_verb = [str(lemma.name()).split('.')[0] for lemma in wn.synset(word + '.v.01').hypernyms()]
except WordNetError:
pass
similar_words = lemmas_noun + hypernyms_noun + lemmas_verb + hypernyms_verb
# filter words which are not purely alphabets (there will be words with underscore)
# this is because if we want to process such words like "domestic_animal", we have to
# implement 2-grams search which is not done here
pattern = re.compile('^[a-zA-Z]+$')
return filter(lambda x: pattern.match(x) and x != word, similar_words)
示例8: expand_queries
def expand_queries(file):
'''
For each term in a query, takes the first synset of the word from wordnet and adds all synonyms of that synset
'''
file = open(file)
for sentence in file:
sentence = sentence.strip()
if sentence.find('<text>') != -1:
query = sentence[sentence.find('>')+1: sentence.rfind('<')]
additions = ''
updated_q = nltk.pos_tag(nltk.wordpunct_tokenize(query.lower()))
full_q = query
for word, pos in updated_q:
if word not in stopwords.words('english'):
looking_for = str(word)+'.'+str(get_wordnet_pos(pos))+'.01'
synsets = wn.synsets(word)
if looking_for in str(synsets):
new_words = (wn.synset(looking_for).lemma_names) #was .definition
for new_word in new_words:
if new_word.lower() != word.lower():
full_q = full_q +' '+ str(new_word)
else:
if wn.morphy(word) != None:
word = wn.morphy(word)
looking_for = str(word)+'.'+str(get_wordnet_pos(pos))+'.01'
print str(looking_for) + ' THIS IS WORD'
synsets = wn.synsets(word)
if looking_for in str(synsets):
new_words = (wn.synset(looking_for).lemma_names) #was .definition
for new_word in new_words:
if new_word.lower() != word.lower():
full_q = full_q +' '+ str(new_word)
print query + ' '+ full_q
示例9: get_similarity
def get_similarity(self,word1,word2):
'''计算相似度:基于WordNet语义词典'''
'''
print 'before stemmed:',word1
print 'after stemmed:',wn.morphy(word1.lower())
print 'before stemmed:',word2
print 'after stemmed:',wn.morphy(word2.lower())
'''
#stemmed word
if wn.morphy(word1.lower()) != None :
word1 = wn.morphy(word1.lower())
if wn.morphy(word2.lower()) != None :
word2 = wn.morphy(word2.lower())
word1_synsets = wn.synsets(word1)
#print word1_synsets
word2_synsets = wn.synsets(word2)
#print word2_synsets
sim = 0
for syn1 in word1_synsets:
w1 = wn.synset(syn1.name())
for syn2 in word2_synsets:
w2 = wn.synset(syn2.name())
tmp = w1.path_similarity(w2)
#print tmp,syn1.name(),syn2.name()
if tmp > sim:
sim = tmp
return sim
示例10: print_other_lexical_rel
def print_other_lexical_rel():
good1 = wn.synset('good.a.01')
wn.lemmas('good')
print("Antonyms of 'good': " + str(good1.lemmas()[0].antonyms()))
print("")
print("Entailment of 'walk': " + str(wn.synset('walk.v.01').entailments()))
print("")
示例11: overlapCount
def overlapCount(self, sentence):
#set count to be one so we can guess in case there are no sentences with overlap
count = 1
sWiki = TextBlob(self.arrayToString(sentence))
sVerbs = self.getVerbs(sWiki)
#compare verbs for similarities and based on wordnet's similarity score
#if they're exactly the same, they'll score 1
for sverb in sVerbs:
synv = wn.synset(sverb + '.v.01')
for qverb in self.questionVerbs:
synq = wn.synset(qverb + '.v.01')
count += synv.path_similarity(synq)
#remove stop words from sentence AFTER we've gotten POS tags
s = self.removeStopWords(sentence)
sLower = self.removeStopWords(sentence.lower())
for word in self.qList:
if word in s:
count += 1
else:
if word.lower() in sLower:
count += 0.1
return count
示例12: compare
def compare(self, word1, word2):
tmp1 = wn.synsets(word1)[0].name
tmp2 = wn.synsets(word2)[0].name
w1 = wn.synset(tmp1)
w2 = wn.synset(tmp2)
val = w1.wup_similarity(w2)
return val
示例13: is_ingredient
def is_ingredient(word):
"""
Return True if the word is an ingredient, False otherwise.
>>> is_ingredient('milk')
True
>>> is_ingredient('blackberries')
True
>>> is_ingredient('Canada')
False
>>> is_ingredient('breakfast')
False
>>> is_ingredient('dish')
False
"""
reject_synsets = ['meal.n.01', 'meal.n.02', 'dish.n.02', 'vitamin.n.01']
reject_synsets = set(wordnet.synset(w) for w in reject_synsets)
accept_synsets = ['food.n.01', 'food.n.02']
accept_synsets = set(wordnet.synset(w) for w in accept_synsets)
for word_synset in wordnet.synsets(word, wordnet.NOUN):
all_synsets = set(word_synset.closure(lambda s: s.hypernyms()))
all_synsets.add(word_synset)
for synset in reject_synsets:
if synset in all_synsets:
return False
for synset in accept_synsets:
if synset in all_synsets:
return True
return word in wordlists.ingredients
示例14: ontoList
def ontoList(self, synset):
# things to pick from
if self.pos == 'v':
ln = wn.synset(synset).lexname.split('.')[1]
hyper = self.lemmatize(self.getHypernyms(synset))
definition = self.getDefinition(synset)
lemmas = self.lemmatize(self.getLemmas(synset))
examples = self.getExamples(synset)
strings = [string.replace("_", " ") for string in self.getFrameStrings(synset)]
hypo = self.lemmatize(self.getHyponyms(synset))
ontologyList = [strings, ln, lemmas, examples, hypo, definition, hyper]
else:
ln = wn.synset(synset).lexname.split('.')[1]
hyper = self.lemmatize(self.getHypernyms(synset))
definition = self.getDefinition(synset)
lemmas = self.lemmatize(self.getLemmas(synset))
examples = self.getExamples(synset)
hypo = self.lemmatize(self.getHyponyms(synset))
ontologyList = [ln, lemmas, examples, hypo, definition, hyper]
returnList = list()
for o in ontologyList:
if o:
returnList.append(o)
return returnList
示例15: calculate_and_write_edge_weigthings_for_synsets
def calculate_and_write_edge_weigthings_for_synsets(synset_filenames_dict, file_name):
max_co_occurrence = calculate_max_co_occurrence(synset_filenames_dict)
edge_weigthings_for_synsets = dict()
how_many_added = 0
how_many_done = 0
how_many_to_do = len(synset_filenames_dict.keys()) * (len(synset_filenames_dict.keys())-1)
write_edge_weightings_to_file(dict(), file_name)
for synset1, filenames1 in synset_filenames_dict.iteritems():
for synset2, filenames2 in synset_filenames_dict.iteritems():
if synset1 < synset2:
how_many_done += 1
#if (synset1.name, synset2.name) not in similarity_histogram:
similarity = wn.synset(synset1).lch_similarity(wn.synset(synset2))
co_occurence = len(set(synset_filenames_dict[synset1]).intersection(set(synset_filenames_dict[synset2])))
normalized_co_occurrence = co_occurence/max_co_occurrence
if similarity < 2.0:
similarity = 0
if normalized_co_occurrence < 0.4:
normalized_co_occurrence = 0
edge_weighting = similarity + 4*normalized_co_occurrence
if edge_weighting != 0:
edge_weigthings_for_synsets[(synset1, synset2)] = edge_weighting
how_many_added += 1
if how_many_added > 1000:
print_status("Done with " + str(how_many_done) + " von " + str(how_many_to_do) + "\n")
write_edge_weightings_to_file(edge_weigthings_for_synsets, file_name, append_to_file=True)
edge_weigthings_for_synsets = dict()
how_many_added = 0
write_edge_weightings_to_file(edge_weigthings_for_synsets, file_name, append_to_file=True)