Python wordnet.all_synsets函数代码示例

本文整理汇总了Python中nltk.corpus.wordnet.all_synsets函数的典型用法代码示例。如果您正苦于以下问题：Python all_synsets函数的具体用法？Python all_synsets怎么用？Python all_synsets使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了all_synsets函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: load

    def load(self, pos):
        wn = self.wn

        if pos == 'n':
            roots = wn.synsets('entity')
        else:
            roots = [s for s in wn.all_synsets(pos) if len(s.hypernyms()) == 0]

        self.root = WordNetTreeNode('root')

        for synset in roots:
            self.__append_synset(synset, self.root)

        # unfortunately, the block above is not guaranteed to build
        # the entire WordNet tree. The reason is that it starts at root
        # adding the descendants retrieved from synset.hyponyms(). For some
        # odd reason that method not always returns all hyponyms. For
        # example, portugal.n.01 is not retrieved as a hyponym of
        # european_country.n.01, but if we call
        #   wn.synsets('portugal')[0].hypernym_paths()
        # european-country.n.01 appears as its ancestor.

        # check for synsets that were not foundss
        index = self.hashtable()
        for synset in wn.all_synsets(pos):
            if synset.name() not in index:
                for path in synset.hypernym_paths():
                    keys = [s.name() for s in path]
                    self.__extend(keys,
                        is_internal = len(path[-1].hyponyms()) > 0)

开发者ID:vialab，项目名称:semantic-guesser，代码行数:30，代码来源:wordnet.py

示例2: prepare

	def prepare(self):
		for verb in wn.all_synsets('v'):
			for lemma in verb.lemmas():
				if 1 in lemma.frame_ids():
					for lemma in verb.lemmas():
						#print lemma.name()
						#print (lemma, lemma.frame_ids(), "|".join(lemma.frame_strings()))
						#print verb.frame_strings()
						verbs.append(str(lemma.name()).replace('_', ' '))
		#print verbs

		for noun in wn.all_synsets('n'):
			#print noun
			for lemma in noun.lemmas():
				#print lemma.name()
				nouns.append(self.plural(str(lemma.name()).replace('_', ' ')))
		#print nouns

		for adj in wn.all_synsets('a'):
			#print adj
			for lemma in adj.lemmas():
				#print lemma.name()
				adjectives.append(str(lemma.name()).replace('_', ' '))

		for adv in wn.all_synsets('r'):
			#print adv
			for lemma in adv.lemmas():
				#print lemma.name()
				adverbs.append(str(lemma.name()).replace('_', ' '))

开发者ID:magali-br，项目名称:webtest，代码行数:29，代码来源:ChomskySatzGenerator.py

示例3: list_nouns

def list_nouns():

    global NOUNS
    print "[+] Creating list of nouns... (This only has to be done once)"

    if WIKI_LANGUAGE == 'en':
        ## Make list of nouns from wordnet
        NOUNS = {x.name().split('.', 1)[0] for x in wn.all_synsets('n')}
        ## TODO CREATE A SEPARATE LIST FOR NOUNS ENDING IN S
    elif WIKI_LANGUAGE == 'es':
        ## Make list of nouns from cess_esp
        list = nltk.corpus.cess_esp.tagged_words()
        sust = []
        for elem in list:
            if elem[1][0] == 'n':
                sust.append(elem[0])
        NOUNS = set(sust)
    # TODO german language support
    # elif WIKI_LANGUAGE == 'de':
    else:
        print "[!] Language not recognised, using English."
        ## Make list of nouns from wordnet
        NOUNS = {x.name().split('.', 1)[0] for x in wn.all_synsets('n')}


    print "    Done!"

开发者ID:Juamps，项目名称:FollowText，代码行数:26，代码来源:follow_text_0.3.py

示例4: populateBars

def populateBars():

    connection = mdb.connect('localhost', 'user', 'pass', 'barlytics')

    current = connection.cursor()

    nounsList = []
    adjectiveList = []
    cityList = ['San Francisco', 'Chicago', 'New York', 'Austin', 'Seattle']

    print "here"
    count = 0
    for synset in list(wn.all_synsets('n')):
        nounsList.append(str(synset.name).split('.')[0])
        count = count + 1
        if count >= 50000:
            break

    count= 0
    print "here"
    for synset in list(wn.all_synsets('a')):
        adjectiveList.append(str(synset.name).split('.')[0])
        count = count + 1
        if count >= 50000:
            break
    print "here"
    finalList = []
    for i in range(10000):
        string = ''
        string = "The " + adjectiveList[randint(0, len(adjectiveList) - 1)].capitalize()

        string = string + " " + nounsList[randint(0, len(nounsList) - 1)].capitalize()
        finalList.append(string)

        name = string
        license = str(randint(1000000, 9000000))
        city = str(address.city())
        phone = str(phone_number.phone_number_format(0))
        addr = str(randint(1, 255)) + " " + address.street_name()


        query = 'insert into bars values("' + name + '", "' + license + '", "' + city + '", "' + phone + '", "' + addr + '"); '
        print query
        try:
            current.execute(query)
        except mdb.IntegrityError:
            print "integrity error:"
    print 'commit'
    connection.commit()

开发者ID:kaushal，项目名称:barlytics，代码行数:49，代码来源:barsNLTK.py

示例5: _run_extract

    def _run_extract(self):
        #extract all 2 word AN and NN compounds from WN and write to file
        print "Extracting noun compounds from WN"


        discards=[]
        allsynsets=list(wn.all_synsets(self.parameters['pos']))
        if not self.parameters['testing']:
            self.n=len(allsynsets)
        for synset in list(wn.all_synsets(self.parameters['pos']))[:self.n]:
            for lemma in synset.lemmas:  #walk over all lemmas for all synsets
                #print lemma.name
                words=lemma.name.split('_')
                if len(words)==2:#check 2 words
                    poslist=[]
                    for word in words:
                        poslist.append(PairGenerator.getpos(word))#generate a PosList List for this pair of words
                    #print words,poslist
                    headpos=poslist.pop()
                    if 'N' in headpos:#is 'N' a possible part of speech for the head word (last word in the list)
                        phrase=words.pop()+'/N'
                        modpos=poslist.pop()
                        mod=words.pop()
                        if 'N' in modpos: #is 'N' a poss part of speech for mod
                            NNphrase=phrase+":nn-DEP:"+mod+'/N'
                            self.NNs.append(NNphrase)
                        if 'J' in modpos:#is 'J' a poss part of speech for mod
                            ANphrase=phrase+":amod-DEP:"+mod+'/J'
                            self.ANs.append(ANphrase)

                        if len(modpos)==0:#only considering J and N for mod
                            #print "Discarding "+lemma.name
                            discards.append(lemma.name)
                    else:#only considering N for head
                        #print "Discarding "+lemma.name
                        discards.append(lemma.name)

        print len(self.NNs),self.NNs
        print len(self.ANs),self.ANs
        print len(discards),discards
        #write lists to file
        with open(self.ANpath,'w') as outstream:
            for AN in self.ANs:
                outstream.write(AN+'\n')
        with open(self.NNpath,'w') as outstream:
            for NN in self.NNs:
                outstream.write(NN+'\n')
        return

开发者ID:julieweeds，项目名称:Compounds，代码行数:48，代码来源:wordpairs.py

示例6: exercise3

def exercise3():
    print
    print "Exercise - 3"
    ss = [w for w in wn.all_synsets('v')]
    result = sum([len(ss[i].hypernyms()) for i in range(len(ss))])
    print "Total number of hypernyms of 'v' is: %d" %result
    print "Average number of hypernyms is: %f" %(result/float(len(ss)))

开发者ID:GirishSrinivas，项目名称:PythonPrograms，代码行数:7，代码来源:GirishSrinivas_ExtraCredit.py

示例7: getAllGlossLinks

def getAllGlossLinks(useTagger=False, useverbs=False, reflexive=False, n=10000):
    links = {}
    print "Gathering synsets"
    synsets = [ss for ss in wordnet.all_synsets()]
    n = 0
    for ss in synsets:
        print "%.3f"%(float(n)/float(len(synsets)))
        n += 1
        ssname = ss.name
        defn = wordboundary.split(ss.definition.strip())
        if useTagger:
            defn = [(form, wdnettags[tag[0]]) for form, tag in useTagger.tag(defn) if not form == "" and tag[0] in wdnettags]
        if not ssname in links:
            links[ssname] = {}
        for w in defn[:n]:
            if type(w) == "str":
                wsynsets = wordnet.synsets(w)
            else:
                wsynsets = wordnet.synsets(w[0], w[1])
            for s in wsynsets:
                sname = s.name
                links[ssname][sname] = True
                if reflexive:
                    if not sname in links:
                        links[sname] = {}
                    links[sname][ssname] = True
        if not ssname in links:
            print ssname, defn
    for l in links:
        ll = links[l]
        for d in ll:
            links[l][d] = 1.0/float(len(ll))
    return links

开发者ID:AllanRamsay，项目名称:COMP34411，代码行数:33，代码来源:ppr.py

示例8: init

    def __init__(self):
        t0 = time()
        print 'initalizing random word generator'

        self.s_articles = ['A', 'The']
        self.o_articles = ['a','the']
        self.prepositions = ['of','in','to','for','with','on','at','from','by',
        'about','as','into','like','through','after','over','out','around']

        self.nouns = list(wn.all_synsets(wn.NOUN))
        self.verbs = list(wn.all_synsets(wn.VERB))
        self.adjectives = list(wn.all_synsets(wn.ADJ))
        self.adverbs = list(wn.all_synsets(wn.ADV))
        t1 = time()
        runTime = t1-t0
        print 'word list initalized in ' + str(runTime) + ' seconds'

开发者ID:DanFlannel，项目名称:TwitterBot，代码行数:16，代码来源:RndSentence.py

示例9: main

def main(argv):
  huang_vocab = LoadHuang()
  manaal_vocab = LoadManaal()
  brown_vocab = LoadBrown()

  all_lemmas = {x.lower() for x in wn.all_lemma_names(pos=wn.ADJ)}
  all_alpha_lemmas = {x for x in all_lemmas if x.isalpha()}
  all_synsets = set(wn.all_synsets(pos=wn.ADJ))
  all_alpha_synsets = {x for x in all_synsets if IsAlphaSS(x)}
  all_lemmas_with_single_synset = {x for x in all_lemmas if IsSingleSynset(x)}
  all_lemmas_ambig_synset = {x for x in all_lemmas if not IsSingleSynset(x)}
  all_lemmas_with_single_synset_alpha = {x for x in all_lemmas_with_single_synset if x.isalpha()}
  all_lemmas_ambig_synset_alpha = {x for x in all_lemmas_ambig_synset if x.isalpha()}
  all_alpha_lemmas_has_noun = {x for x in all_alpha_lemmas if LemmaHasNoun(x)}
  all_alpha_lemmas_has_noun_single_lexname = {x for x in all_alpha_lemmas_has_noun if IsNounSingleLexName(x) }
  print "all_lemmas:", len(all_lemmas)
  print "all_alpha_lemmas:", len(all_alpha_lemmas)
  print "all_synsets:", len(all_synsets)
  print "all_alpha_synsets:", len(all_alpha_synsets)
  print "all_lemmas_with_single_synset:", len(all_lemmas_with_single_synset)
  print "all_lemmas_ambig_synset:", len(all_lemmas_ambig_synset)
  print "all_lemmas_with_single_synset_alpha", len(all_lemmas_with_single_synset_alpha)
  print "all_lemmas_ambig_synset_alpha", len(all_lemmas_ambig_synset_alpha)
  print "all_alpha_lemmas_has_noun", len(all_alpha_lemmas_has_noun)
  print "all_alpha_lemmas_has_noun_single_lexname", len(all_alpha_lemmas_has_noun_single_lexname)
  print "huang.intersect(all_alpha_lemmas)", len(huang_vocab.intersection(all_alpha_lemmas))
  print "manaal.intersect(all_alpha_lemmas)", len(manaal_vocab.intersection(all_alpha_lemmas))
  print "brown.intersect(all_alpha_lemmas)", len(brown_vocab.intersection(all_alpha_lemmas))
  print "huang*manaal*brown*all_alpha_lemmas", len(huang_vocab.intersection(all_alpha_lemmas, manaal_vocab, brown_vocab))
  print "huang.intersect(all_lemmas_with_single_synset_alpha)", len(huang_vocab.intersection(all_lemmas_with_single_synset_alpha))
  print "manaal.intersect(all_lemmas_with_single_synset_alpha)", len(manaal_vocab.intersection(all_lemmas_with_single_synset_alpha))
  print "brown.intersect(all_lemmas_with_single_synset_alpha)", len(brown_vocab.intersection(all_lemmas_with_single_synset_alpha))
  print "huang*manaal*brown*all_lemmas_with_single_synset_alpha", len(huang_vocab.intersection(all_lemmas_with_single_synset_alpha, manaal_vocab, brown_vocab))

开发者ID:ytsvetko，项目名称:supersense_classifier，代码行数:33，代码来源:statistics.py

示例10: convert_all_to_basic

    def convert_all_to_basic(reviews):
        print("Process Started")
        print("Gettin all nouns....")
        words=[s for s in wn.all_synsets(wn.NOUN) if  (s.name().find('-')==-1) and (s.name().find('_')==-1) and len(s.name().split('.')[0])<12]

        print("Processing basic logic probability...")
        words2 = []
        filter_basic_logic(words,words2)

        print("Removing redundancy...")
        a = list(set(words2))
        a.sort()
        remove_unwanted(a)
        newReviews = []
        for review in reviews:
            tempReview = ""
            tokens = word_tokenize(review)
            for token in tokens:
                tempword = check_basic(token,a)
                if tempword:
                    tempReview = tempReview + " " + tempword
                else:
                    tempReview = tempReview + " " +  token
            newReviews.append(tempReview)
        return newReviews

开发者ID:Salihan04，项目名称:TomatoEngine，代码行数:25，代码来源:bc_prog.py

示例11: wn_pos_dist

def wn_pos_dist():
    """Count the Synsets in each WordNet POS category."""
    # One-dimensional count dict with 0 as the default value:
    cats = defaultdict(int)
    # The counting loop:
    for synset in wn.all_synsets():
        cats[synset.pos] += 1

开发者ID:gthandavam，项目名称:SunyVisD，代码行数:7，代码来源:prepare.py

示例12: load_corpora

    def load_corpora( self ):

        print "Loading corpora..."

        pth = os.path.realpath( os.path.dirname(__file__) )
        nltk.data.path.append( os.path.join( pth, "nltk_data" ) )
        from nltk.corpus import wordnet as wn

        self._adjectives = list(wn.all_synsets('a'))
        self._nouns = list(wn.all_synsets('n'))

        with open( os.path.join( pth, "firstnames.txt") ) as fh:
            self._firstnames = fh.readlines()

        with open( os.path.join( pth, "surnames.txt") ) as fh:
            self._surnames = fh.readlines()

开发者ID:prehensile，项目名称:everysummertitle，代码行数:16，代码来源:generator.py

示例13: populate_cache

def populate_cache():
    adjectives, nouns = (set(), set())

    for wordset, kind in [
        (adjectives, wordnet.ADJ),
        (nouns, wordnet.NOUN),
    ]:
        for synset in wordnet.all_synsets(kind):
            for lemma in filter(
                lambda l: all((
                    not re.search(r'\d', l.name()),
                    l.name() not in BLACKLIST,
                    not l.name().endswith('_to'),
                    l.count() > 0,
                )), synset.lemmas()
            ):
                wordset.add(lemma.name().replace('_', ' '))

    os.mkdir(CACHE_PATH)

    for words, filename in [
        (adjectives, 'adjectives'),
        (nouns, 'nouns'),
    ]:
        with open(os.path.join(CACHE_PATH, filename), 'w') as f:
            f.writelines((u'{}\n'.format(w) for w in words))

开发者ID:tinruufu，项目名称:littlepileof，代码行数:26，代码来源:miserable.py

示例14: list_nouns

def list_nouns():
    ## TODO CREATE A SEPARATE LIST FOR NOUNS ENDING IN S
    global NOUNS
    print "[+] Creating list of nouns... (This only has to be done once)"
    ## Make list of nouns in wordnet
    NOUNS = {x.name().split('.', 1)[0] for x in wn.all_synsets('n')}
    print "    Done!"

开发者ID:Juamps，项目名称:FollowText，代码行数:7，代码来源:follow_text_0.2.py

示例15: ex26_branchingfactor

def ex26_branchingfactor():
  from nltk.corpus import wordnet as wn
  num_synsets = 0
  num_hyponyms = 0
  for noun_synset in wn.all_synsets("n"):
    (num_hyponyms, num_synsets) = \
      branchingfactor_r(noun_synset, num_synsets, num_hyponyms)
  print "branching factor=", (num_hyponyms / num_synsets)

开发者ID:447327642，项目名称:nltk-examples，代码行数:8，代码来源:ch02_ex.py

注：本文中的nltk.corpus.wordnet.all_synsets函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。