本文整理汇总了Python中nltk.corpus.wordnet.all_synsets函数的典型用法代码示例。如果您正苦于以下问题:Python all_synsets函数的具体用法?Python all_synsets怎么用?Python all_synsets使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了all_synsets函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load
def load(self, pos):
wn = self.wn
if pos == 'n':
roots = wn.synsets('entity')
else:
roots = [s for s in wn.all_synsets(pos) if len(s.hypernyms()) == 0]
self.root = WordNetTreeNode('root')
for synset in roots:
self.__append_synset(synset, self.root)
# unfortunately, the block above is not guaranteed to build
# the entire WordNet tree. The reason is that it starts at root
# adding the descendants retrieved from synset.hyponyms(). For some
# odd reason that method not always returns all hyponyms. For
# example, portugal.n.01 is not retrieved as a hyponym of
# european_country.n.01, but if we call
# wn.synsets('portugal')[0].hypernym_paths()
# european-country.n.01 appears as its ancestor.
# check for synsets that were not foundss
index = self.hashtable()
for synset in wn.all_synsets(pos):
if synset.name() not in index:
for path in synset.hypernym_paths():
keys = [s.name() for s in path]
self.__extend(keys,
is_internal = len(path[-1].hyponyms()) > 0)
示例2: prepare
def prepare(self):
for verb in wn.all_synsets('v'):
for lemma in verb.lemmas():
if 1 in lemma.frame_ids():
for lemma in verb.lemmas():
#print lemma.name()
#print (lemma, lemma.frame_ids(), "|".join(lemma.frame_strings()))
#print verb.frame_strings()
verbs.append(str(lemma.name()).replace('_', ' '))
#print verbs
for noun in wn.all_synsets('n'):
#print noun
for lemma in noun.lemmas():
#print lemma.name()
nouns.append(self.plural(str(lemma.name()).replace('_', ' ')))
#print nouns
for adj in wn.all_synsets('a'):
#print adj
for lemma in adj.lemmas():
#print lemma.name()
adjectives.append(str(lemma.name()).replace('_', ' '))
for adv in wn.all_synsets('r'):
#print adv
for lemma in adv.lemmas():
#print lemma.name()
adverbs.append(str(lemma.name()).replace('_', ' '))
示例3: list_nouns
def list_nouns():
global NOUNS
print "[+] Creating list of nouns... (This only has to be done once)"
if WIKI_LANGUAGE == 'en':
## Make list of nouns from wordnet
NOUNS = {x.name().split('.', 1)[0] for x in wn.all_synsets('n')}
## TODO CREATE A SEPARATE LIST FOR NOUNS ENDING IN S
elif WIKI_LANGUAGE == 'es':
## Make list of nouns from cess_esp
list = nltk.corpus.cess_esp.tagged_words()
sust = []
for elem in list:
if elem[1][0] == 'n':
sust.append(elem[0])
NOUNS = set(sust)
# TODO german language support
# elif WIKI_LANGUAGE == 'de':
else:
print "[!] Language not recognised, using English."
## Make list of nouns from wordnet
NOUNS = {x.name().split('.', 1)[0] for x in wn.all_synsets('n')}
print " Done!"
示例4: populateBars
def populateBars():
connection = mdb.connect('localhost', 'user', 'pass', 'barlytics')
current = connection.cursor()
nounsList = []
adjectiveList = []
cityList = ['San Francisco', 'Chicago', 'New York', 'Austin', 'Seattle']
print "here"
count = 0
for synset in list(wn.all_synsets('n')):
nounsList.append(str(synset.name).split('.')[0])
count = count + 1
if count >= 50000:
break
count= 0
print "here"
for synset in list(wn.all_synsets('a')):
adjectiveList.append(str(synset.name).split('.')[0])
count = count + 1
if count >= 50000:
break
print "here"
finalList = []
for i in range(10000):
string = ''
string = "The " + adjectiveList[randint(0, len(adjectiveList) - 1)].capitalize()
string = string + " " + nounsList[randint(0, len(nounsList) - 1)].capitalize()
finalList.append(string)
name = string
license = str(randint(1000000, 9000000))
city = str(address.city())
phone = str(phone_number.phone_number_format(0))
addr = str(randint(1, 255)) + " " + address.street_name()
query = 'insert into bars values("' + name + '", "' + license + '", "' + city + '", "' + phone + '", "' + addr + '"); '
print query
try:
current.execute(query)
except mdb.IntegrityError:
print "integrity error:"
print 'commit'
connection.commit()
示例5: _run_extract
def _run_extract(self):
#extract all 2 word AN and NN compounds from WN and write to file
print "Extracting noun compounds from WN"
discards=[]
allsynsets=list(wn.all_synsets(self.parameters['pos']))
if not self.parameters['testing']:
self.n=len(allsynsets)
for synset in list(wn.all_synsets(self.parameters['pos']))[:self.n]:
for lemma in synset.lemmas: #walk over all lemmas for all synsets
#print lemma.name
words=lemma.name.split('_')
if len(words)==2:#check 2 words
poslist=[]
for word in words:
poslist.append(PairGenerator.getpos(word))#generate a PosList List for this pair of words
#print words,poslist
headpos=poslist.pop()
if 'N' in headpos:#is 'N' a possible part of speech for the head word (last word in the list)
phrase=words.pop()+'/N'
modpos=poslist.pop()
mod=words.pop()
if 'N' in modpos: #is 'N' a poss part of speech for mod
NNphrase=phrase+":nn-DEP:"+mod+'/N'
self.NNs.append(NNphrase)
if 'J' in modpos:#is 'J' a poss part of speech for mod
ANphrase=phrase+":amod-DEP:"+mod+'/J'
self.ANs.append(ANphrase)
if len(modpos)==0:#only considering J and N for mod
#print "Discarding "+lemma.name
discards.append(lemma.name)
else:#only considering N for head
#print "Discarding "+lemma.name
discards.append(lemma.name)
print len(self.NNs),self.NNs
print len(self.ANs),self.ANs
print len(discards),discards
#write lists to file
with open(self.ANpath,'w') as outstream:
for AN in self.ANs:
outstream.write(AN+'\n')
with open(self.NNpath,'w') as outstream:
for NN in self.NNs:
outstream.write(NN+'\n')
return
示例6: exercise3
def exercise3():
print
print "Exercise - 3"
ss = [w for w in wn.all_synsets('v')]
result = sum([len(ss[i].hypernyms()) for i in range(len(ss))])
print "Total number of hypernyms of 'v' is: %d" %result
print "Average number of hypernyms is: %f" %(result/float(len(ss)))
示例7: getAllGlossLinks
def getAllGlossLinks(useTagger=False, useverbs=False, reflexive=False, n=10000):
links = {}
print "Gathering synsets"
synsets = [ss for ss in wordnet.all_synsets()]
n = 0
for ss in synsets:
print "%.3f"%(float(n)/float(len(synsets)))
n += 1
ssname = ss.name
defn = wordboundary.split(ss.definition.strip())
if useTagger:
defn = [(form, wdnettags[tag[0]]) for form, tag in useTagger.tag(defn) if not form == "" and tag[0] in wdnettags]
if not ssname in links:
links[ssname] = {}
for w in defn[:n]:
if type(w) == "str":
wsynsets = wordnet.synsets(w)
else:
wsynsets = wordnet.synsets(w[0], w[1])
for s in wsynsets:
sname = s.name
links[ssname][sname] = True
if reflexive:
if not sname in links:
links[sname] = {}
links[sname][ssname] = True
if not ssname in links:
print ssname, defn
for l in links:
ll = links[l]
for d in ll:
links[l][d] = 1.0/float(len(ll))
return links
示例8: __init__
def __init__(self):
t0 = time()
print 'initalizing random word generator'
self.s_articles = ['A', 'The']
self.o_articles = ['a','the']
self.prepositions = ['of','in','to','for','with','on','at','from','by',
'about','as','into','like','through','after','over','out','around']
self.nouns = list(wn.all_synsets(wn.NOUN))
self.verbs = list(wn.all_synsets(wn.VERB))
self.adjectives = list(wn.all_synsets(wn.ADJ))
self.adverbs = list(wn.all_synsets(wn.ADV))
t1 = time()
runTime = t1-t0
print 'word list initalized in ' + str(runTime) + ' seconds'
示例9: main
def main(argv):
huang_vocab = LoadHuang()
manaal_vocab = LoadManaal()
brown_vocab = LoadBrown()
all_lemmas = {x.lower() for x in wn.all_lemma_names(pos=wn.ADJ)}
all_alpha_lemmas = {x for x in all_lemmas if x.isalpha()}
all_synsets = set(wn.all_synsets(pos=wn.ADJ))
all_alpha_synsets = {x for x in all_synsets if IsAlphaSS(x)}
all_lemmas_with_single_synset = {x for x in all_lemmas if IsSingleSynset(x)}
all_lemmas_ambig_synset = {x for x in all_lemmas if not IsSingleSynset(x)}
all_lemmas_with_single_synset_alpha = {x for x in all_lemmas_with_single_synset if x.isalpha()}
all_lemmas_ambig_synset_alpha = {x for x in all_lemmas_ambig_synset if x.isalpha()}
all_alpha_lemmas_has_noun = {x for x in all_alpha_lemmas if LemmaHasNoun(x)}
all_alpha_lemmas_has_noun_single_lexname = {x for x in all_alpha_lemmas_has_noun if IsNounSingleLexName(x) }
print "all_lemmas:", len(all_lemmas)
print "all_alpha_lemmas:", len(all_alpha_lemmas)
print "all_synsets:", len(all_synsets)
print "all_alpha_synsets:", len(all_alpha_synsets)
print "all_lemmas_with_single_synset:", len(all_lemmas_with_single_synset)
print "all_lemmas_ambig_synset:", len(all_lemmas_ambig_synset)
print "all_lemmas_with_single_synset_alpha", len(all_lemmas_with_single_synset_alpha)
print "all_lemmas_ambig_synset_alpha", len(all_lemmas_ambig_synset_alpha)
print "all_alpha_lemmas_has_noun", len(all_alpha_lemmas_has_noun)
print "all_alpha_lemmas_has_noun_single_lexname", len(all_alpha_lemmas_has_noun_single_lexname)
print "huang.intersect(all_alpha_lemmas)", len(huang_vocab.intersection(all_alpha_lemmas))
print "manaal.intersect(all_alpha_lemmas)", len(manaal_vocab.intersection(all_alpha_lemmas))
print "brown.intersect(all_alpha_lemmas)", len(brown_vocab.intersection(all_alpha_lemmas))
print "huang*manaal*brown*all_alpha_lemmas", len(huang_vocab.intersection(all_alpha_lemmas, manaal_vocab, brown_vocab))
print "huang.intersect(all_lemmas_with_single_synset_alpha)", len(huang_vocab.intersection(all_lemmas_with_single_synset_alpha))
print "manaal.intersect(all_lemmas_with_single_synset_alpha)", len(manaal_vocab.intersection(all_lemmas_with_single_synset_alpha))
print "brown.intersect(all_lemmas_with_single_synset_alpha)", len(brown_vocab.intersection(all_lemmas_with_single_synset_alpha))
print "huang*manaal*brown*all_lemmas_with_single_synset_alpha", len(huang_vocab.intersection(all_lemmas_with_single_synset_alpha, manaal_vocab, brown_vocab))
示例10: convert_all_to_basic
def convert_all_to_basic(reviews):
print("Process Started")
print("Gettin all nouns....")
words=[s for s in wn.all_synsets(wn.NOUN) if (s.name().find('-')==-1) and (s.name().find('_')==-1) and len(s.name().split('.')[0])<12]
print("Processing basic logic probability...")
words2 = []
filter_basic_logic(words,words2)
print("Removing redundancy...")
a = list(set(words2))
a.sort()
remove_unwanted(a)
newReviews = []
for review in reviews:
tempReview = ""
tokens = word_tokenize(review)
for token in tokens:
tempword = check_basic(token,a)
if tempword:
tempReview = tempReview + " " + tempword
else:
tempReview = tempReview + " " + token
newReviews.append(tempReview)
return newReviews
示例11: wn_pos_dist
def wn_pos_dist():
"""Count the Synsets in each WordNet POS category."""
# One-dimensional count dict with 0 as the default value:
cats = defaultdict(int)
# The counting loop:
for synset in wn.all_synsets():
cats[synset.pos] += 1
示例12: load_corpora
def load_corpora( self ):
print "Loading corpora..."
pth = os.path.realpath( os.path.dirname(__file__) )
nltk.data.path.append( os.path.join( pth, "nltk_data" ) )
from nltk.corpus import wordnet as wn
self._adjectives = list(wn.all_synsets('a'))
self._nouns = list(wn.all_synsets('n'))
with open( os.path.join( pth, "firstnames.txt") ) as fh:
self._firstnames = fh.readlines()
with open( os.path.join( pth, "surnames.txt") ) as fh:
self._surnames = fh.readlines()
示例13: populate_cache
def populate_cache():
adjectives, nouns = (set(), set())
for wordset, kind in [
(adjectives, wordnet.ADJ),
(nouns, wordnet.NOUN),
]:
for synset in wordnet.all_synsets(kind):
for lemma in filter(
lambda l: all((
not re.search(r'\d', l.name()),
l.name() not in BLACKLIST,
not l.name().endswith('_to'),
l.count() > 0,
)), synset.lemmas()
):
wordset.add(lemma.name().replace('_', ' '))
os.mkdir(CACHE_PATH)
for words, filename in [
(adjectives, 'adjectives'),
(nouns, 'nouns'),
]:
with open(os.path.join(CACHE_PATH, filename), 'w') as f:
f.writelines((u'{}\n'.format(w) for w in words))
示例14: list_nouns
def list_nouns():
## TODO CREATE A SEPARATE LIST FOR NOUNS ENDING IN S
global NOUNS
print "[+] Creating list of nouns... (This only has to be done once)"
## Make list of nouns in wordnet
NOUNS = {x.name().split('.', 1)[0] for x in wn.all_synsets('n')}
print " Done!"
示例15: ex26_branchingfactor
def ex26_branchingfactor():
from nltk.corpus import wordnet as wn
num_synsets = 0
num_hyponyms = 0
for noun_synset in wn.all_synsets("n"):
(num_hyponyms, num_synsets) = \
branchingfactor_r(noun_synset, num_synsets, num_hyponyms)
print "branching factor=", (num_hyponyms / num_synsets)