当前位置: 首页>>代码示例>>Python>>正文


Python cmudict.dict函数代码示例

本文整理汇总了Python中nltk.corpus.cmudict.dict函数的典型用法代码示例。如果您正苦于以下问题:Python dict函数的具体用法?Python dict怎么用?Python dict使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了dict函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

  def __init__(self, dataset="picasso2", basedir="parsed_data"):
    self.dataset = dataset
    self.basedir = basedir
    filename = "%s/%s/source/%s" % (basedir,dataset,dataset)
    self.debug("poemparser:init:dataset parsing '%s'..." % filename)

    with open("pyParser/english_words.txt") as word_file:
      self.english_words = set(word.strip().lower() for word in word_file)

    # Open and analyze the text data.
    self.unknownWords   = {}
    self.iffyWords      = {}
    self.allmatch       = {}
    self.alltokens      = self.openTokens(filename)
    self.parsedTokens   = [token for token in self.alltokens[0] if token != '-']
    self.replacedTokens = [token for token in self.alltokens[1] if token != '-']
    self.fullTokens     = [token for token in self.alltokens[2] if token != '-']
    self.tokens         = self.parsedTokens
    self.loweredTokens  = [token.lower() for token in self.replacedTokens]
    self.pos_tags       = nltk.pos_tag(self.replacedTokens)
    self.text           = nltk.Text(self.tokens)
    self.dict           = cmudict.dict() 
    self.lastspeed      = 0
    self.midiindex      = 0
    
    self.setMIDISettings(12)
    
    self.debug("poemparser:init:words %s"  % self.fullTokens)
    self.debug("poemparser:init:tokens %s" % self.tokens)
    self.debug("poemparser:init:text %s"   % self.text)
开发者ID:kiddphunk,项目名称:CircosLogosParser,代码行数:30,代码来源:parser.py

示例2: approx_nsyl

def approx_nsyl(word):
	"""Credit - Jason Sundram, http://runningwithdata.com/post/3576752158/w
	Return the max syllable count in the case of multiple pronunciations"""
	d = cmudict.dict()
	if word not in d.keys():
		return 0
	return max([len([y for y in x if y[-1].isdigit()]) for x in d[word.lower()]])
开发者ID:psloomis,项目名称:LyricClassifier,代码行数:7,代码来源:generator.py

示例3: reset_country_codes_to_emoflags

def reset_country_codes_to_emoflags(cc_path='country_codes.txt',
        irange=ET.FLAGS_RANGE, charset='utf-8'):
    '''
    Using a country code dict, set the name and syllable fields
    in a copy of emo_tuples.
    '''
    cmu_prons = cmudict.dict() # get the CMU Pronouncing Dict
    cc_dict = load_country_codes(cc_path)

    for tup in ET.EMO_TUPLES[irange.start:irange.stop]:
        cc2 = tup[ET.INDEX_ALTERNATIVES][0].strip(':').upper()
        # print(cc2, '  ', end='')
        monos, polys, names = [], [], [cc2]
        names.extend(nm for nm in tup[ET.INDEX_POLYSYLLABLES] if len(nm) > 2)
        try:
            names.extend(cc_dict[cc2])
            # print(names, file=sys.stderr)
        except KeyError:
            print("{} missing {}\n\tusing: {}".format(
                   cc2, tup, names), file=sys.stderr)
        for name in set(names):
            if sylc.syl_count(cmu_prons, name) == 1:
                monos.append(name)
            else:
                polys.append(name)
        tupal = list(tup)
        tupal[ET.INDEX_WORDSYLLABLES] = monos
        tupal[ET.INDEX_POLYSYLLABLES] = polys
        ret = tuple(tupal)
        print("    {},".format(ret), file=sys.stdout)
        # tupal[ET.INDEX_WORDSYLLABLES] =
    print()
开发者ID:sprax,项目名称:python,代码行数:32,代码来源:emo_tuples_gen.py

示例4: compile_meter_list

	def compile_meter_list(self, new_words, verbose=True):
	    # simplifies and compiles cmu cormpus info into listed list
	    iambic = cmudict.dict()                     # connect to cmu corpus, called iambic
	    big_list = []                               # list to collect all the different versions of words and their meter
	    for word in new_words:                      # get word from list of clean words
	        syl_num = sylco([word])
	        word_n_versions_list = []               # list has each word and the different versions
	        word_n_versions_list.append(word)       # add word
	        versions_list = []                      # list of all diff versions
	        try:                                    # if word is in corpus
	            for n,x in enumerate(iambic[word.lower()]): # get versions for each word
	                version = []                    # list for each version
	                version.append(word+str(n))     # add word+version
	                meter_list = []                 # list holds word version's meter
	                for y in x:                     # for word in cmu-dict sent
	                    for char in y:              # for character in word
	                        if char.isdigit() == True: # if the char is a number
	                            meter_list.append(int(char)) # add number to meter
	                version.append(meter_list)      # add meter to the word version
	                versions_list.append(version)   # add all the versions to one list
	            word_n_versions_list.append(versions_list) # add list of diff versions to word and versions list
	            big_list.append(word_n_versions_list)       
	        except:                                 # if word isnt in corpus
	            version = []                        # empty version
	            version.append(word+str(0))         # add word1
	            meter_list = []                     # empty meter list
	            if len(syl_num) == 1:
	                for syl in range(syl_num[0]):          # for each syllable...
	                    meter_list.append(-1)           # add 0 to meter_list
	                version.append(meter_list)          # add empty meter list to version
	                versions_list.append(version)       # add version w/ word1 to versions list
	                word_n_versions_list.append(versions_list) # add list of diff versions to word and versions list
	                big_list.append(word_n_versions_list) # adds word and versions to big list
	    return big_list
开发者ID:akodate,项目名称:DSI-SF-2-akodate,代码行数:34,代码来源:functions.py

示例5: __compliant_haiku

 def __compliant_haiku(self, haiku_source):
     """Ensure that newlines remain and all 
     other punctuation has been stripped"""
     """Ensure that newlines remain and all 
     other punctuation has been stripped"""
     dict = cmudict.dict()
     haiku_lines = haiku_source.splitlines()
     syllables = []
     for line in haiku_lines:
         if line == "":
             continue
         sal=[]
         for word in line.split(" "):
             sal.append(len([x for x in dict[word][0] if x[-1].isdigit()]))
         syllables.append(sum(sal))
     pattern = [5,7,5]
     if len(syllables) % 3 == 0:
         while len(syllables) > 0:
             if syllables[:3] == pattern:
                 for x in range(2,-1,-1):
                     syllables.pop(x)
             else:
                 return False
     else:
         return False
     return True
开发者ID:Reboare,项目名称:py-brainfuck,代码行数:26,代码来源:cherryblossom.py

示例6: __init__

 def __init__(self,text):
     # Initialize vars
     self.sent_count = 0
     self.word_count = 0
     self.syll_count = 0
     self.cmu = cmudict.dict()
     self.processText(text)
开发者ID:hshore29,项目名称:ListStats,代码行数:7,代码来源:readability.py

示例7: make_cmu_wordlist

def make_cmu_wordlist():
    """
    Strip the CMU Pronunciation Dictionary of accent marks.

    Add '$' to the end of strings (for markov chain use).

    Pickle and dump to 'cmu.p'.
    """
    d = cmudict.dict()
    pronunciation_list = d.values()

    edited_list = []
    for entry in pronunciation_list:
        for word in entry:
            edited_word = ["#"]
            for i in xrange(len(word)):
                #remove accent marks
                edited_word.append(word[i].rstrip('0123456789'))
                
            #Use '$' to mark the end of words
            edited_word.append('$')
            edited_list.append(edited_word)

#    with open('wordlists/cmu.p', 'w') as outfile:
#        pickle.dump(edited_list, outfile)
    
    return edited_list
开发者ID:samzhang111,项目名称:gibberish,代码行数:27,代码来源:wordlist.py

示例8: group_rhyming_tweets

def group_rhyming_tweets(filtered_tweet_list):
    """groups rhyming tweets into lists, then returns a list containing those lists. lists are sorted so that the list with the most rhyming words
    is first in the list."""
    copy_filtered_tweet_list = list(filtered_tweet_list)
    dictionary = cmudict.dict()
    grouped_rhyming_tweets = []
    index = 0
    while (
        index < len(copy_filtered_tweet_list) - 1
    ):  # don't need to check last element for rhymes against other words b/c all pairs of words checked already by that point
        rhyme_list = [copy_filtered_tweet_list[index]]
        i = index + 1
        while i < len(copy_filtered_tweet_list):
            if (
                do_sentences_rhyme(copy_filtered_tweet_list[index], copy_filtered_tweet_list[i], dictionary)
                or sentence_rhyme_score(copy_filtered_tweet_list[index], copy_filtered_tweet_list[i]) > 4
            ):
                rhyme_list.append(copy_filtered_tweet_list[i])
                copy_filtered_tweet_list.pop(i)
                i = i - 1
            i = i + 1
        rhyme_list = list(set(rhyme_list))  # remove non-unique entries by converting to a set and back again
        grouped_rhyming_tweets.append(rhyme_list)
        index = index + 1
    # grouped_rhyming_tweets = sorted(grouped_rhyming_tweets, key = len, reverse = True)
    grouped_rhyming_tweets = [i for i in grouped_rhyming_tweets if len(i) > 1]
    return grouped_rhyming_tweets
开发者ID:hdavidzhu,项目名称:yungtweetzy,代码行数:27,代码来源:private_twitter_test.py

示例9: __init__

  def __init__(self):

    # generate n2w 
    self.n2w = gen_n2w()

    # syllable dict
    self.cmu = cmudict.dict()
开发者ID:cirlabs,项目名称:tweet-music,代码行数:7,代码来源:haiku.py

示例10: on_load

    def on_load(self):
        print "Loading: " + self.__class__.__name__
        wd = self.context.getWorkingDir()
        nltk.data.path.append(wd + "nltk_data")

        self.d =  cmudict.dict()
        pass
开发者ID:mikolajb,项目名称:pumpkin,代码行数:7,代码来源:filterhaikus.py

示例11: parse_sentence

def parse_sentence(sent, syl=partial(syllabify, English),
                   pron_dict=cmudict.dict()):
    sent = sent.strip()
    if not len(sent):
        return
    tokens = list(filter(len, map(preprocess, sent.split())))
    phonemes = (map(syl, pron_dict[t]) for t in tokens)

    nsyllables = set()
    final_sounds = set()
    for words in product(*phonemes):
        if not len(words):
            return

        # Count the number of syllables and extract the stress pattern.
        stress, syllables = zip(*((s[0], s[1:]) for w in words for s in w))

        # Compute the final sound.
        final_syllable = syllables[-1]
        if len(final_syllable[2]):
            final_sound = "_".join(map("_".join, final_syllable[1:]))
        elif len(final_syllable[0]):
            final_sound = "{0}_{1}".format(final_syllable[0][-1],
                                           "_".join(final_syllable[1]))
        else:
            final_sound = "_".join(final_syllable[1])

        # Update the possible versions for this sentence.
        nsyllables.add(len(stress))
        final_sounds.add(final_sound + "_{0}".format(int(stress[-1] > 0)))

    return nsyllables, final_sounds, [tokens[-1]]
开发者ID:dfm,项目名称:twitterick,代码行数:32,代码来源:lang.py

示例12: stress

	def stress(self,bysentence=False):
		"""
		tokenizes (I guess) the words in self.text by the stress pattern in each of the words.
		"""
		vowels = ['A','E','I','O','U']
		possible_stresses = ['1','2','0']
		totaldic = cmudict.dict()
		def gen_stress(stripped_text):
			stress_list = []
			for word in stripped_text.lower().split():
				try:
					stress = str()
					phonemized = totaldic[word][0]
					for phoneme in phonemized:
						for stresser in possible_stresses:
							if stresser in phoneme:
								stress += stresser
					for index, sound in enumerate(phonemized[len(phonemized)-2:len(phonemized)]):
						for vowel in vowels:
							if vowel in sound:
								stress_list.append([word,stress,[index, sound],phonemized,len(phonemized)])
				except KeyError:
					# print("{} couldn't be found".format(word))
					pass
			return stress_list

		if bysentence:
			sentences = PunktSentenceTokenizer().tokenize(master_str)
			stress_by_sentence = [sentence.translate(string.maketrans("",""), string.punctuation) for sentence in sentences]
			return [gen_stress(sentence) for sentence in stress_by_sentence]

		elif not bysentence:
			stress_total = self.text.translate(string.maketrans("",""), string.punctuation) 
			return gen_stress(stress_total)
开发者ID:dean-shaff,项目名称:poems,代码行数:34,代码来源:text_generator.py

示例13: fix_db

def fix_db():

    print "* Executing database FIX procedure..."

    # connect to db
    mongodb_url = os.getenv("OPENSHIFT_MONGODB_DB_URL")
    client = pym.MongoClient(mongodb_url)
    db = client["shalk"]
    coll = db["ngrams"]

    base_data_dir = os.getenv("OPENSHIFT_DATA_DIR")
    if not base_data_dir:
        base_data_dir = "../data/"

    # initialize cmu dict
    nltk.data.path = ["{0}nltk/".format(base_data_dir)]
    cdict = cmudict.dict()

    count = 0
    upcount = 0
    mod = 100

    # iterate over all docs that need fixing
    orlist = [
        {"syllables": {"$exists": False}},
        {"rand": {"$exists": False}},
        {"type": {"$exists": False}},
        {"rhyme": {"$exists": False}},
    ]
    ngrams = coll.find({"$or": orlist})
    total = ngrams.count()

    for ngram in ngrams:
        upngram = False
        lastword = get_last_word(ngram)

        if "syllables" not in ngram:
            upngram = True
            ngram["syllables"] = count_syllables(lastword, cdict)
        if "rand" not in ngram:
            upngram = True
            ngram["rand"] = random.random()
        if "rhyme" not in ngram:
            upngram = True
            ngram["rhyme"] = get_rhyme(lastword, cdict)

        if not upngram:
            count += 1
            continue

        update_ngram(ngram, db)

        upcount += 1
        count += 1
        if count % mod == 0:
            print "- {0} out of {1} analysed! Docs updated: {2}".format(count, total, upcount)
            sys.stdout.flush()

    print "* Database FIX procedure finished!"
开发者ID:helderm,项目名称:shalk,代码行数:59,代码来源:dbload.py

示例14: load_pronunciations

def load_pronunciations(pronun_dictionary_name='cmudict', stress='unstressed'):
    """ note that we only support cmudict from nltk """
    if stress not in STRESS_OPTIONS:
        raise TypeError

    try: cmu = cmudict.dict()
    except LookupError, AttributeError:
        cmu = load_cmu_pickle()
开发者ID:kinguistics,项目名称:pyt2p,代码行数:8,代码来源:pronunciations.py

示例15: does_rhyme_unit_test

def does_rhyme_unit_test():    
    dictionary = cmudict.dict()
    print does_rhyme('lol','bol',2,dictionary)  
    print does_rhyme('cat','dog',2,dictionary)
    print does_rhyme('cat','bat',2,dictionary)
    print does_rhyme('cat','tot',2,dictionary)
    print does_rhyme('cat','tot',2,dictionary)
    print does_rhyme('hello','yellow',2,dictionary)
开发者ID:dennis-chen,项目名称:YUNGTWEETZY,代码行数:8,代码来源:rhyming_tweets.py


注:本文中的nltk.corpus.cmudict.dict函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。