当前位置: 首页>>代码示例>>Python>>正文


Python PlaintextCorpusReader.words方法代码示例

本文整理汇总了Python中nltk.corpus.PlaintextCorpusReader.words方法的典型用法代码示例。如果您正苦于以下问题:Python PlaintextCorpusReader.words方法的具体用法?Python PlaintextCorpusReader.words怎么用?Python PlaintextCorpusReader.words使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.corpus.PlaintextCorpusReader的用法示例。


在下文中一共展示了PlaintextCorpusReader.words方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: save_my_count

# 需要导入模块: from nltk.corpus import PlaintextCorpusReader [as 别名]
# 或者: from nltk.corpus.PlaintextCorpusReader import words [as 别名]
 def save_my_count(self,corpus,patt,n,filename):
     wordlists = PlaintextCorpusReader(corpus,patt)
     fileids = wordlists.fileids()
     res = []
     for id in fileids:    
         leng = len(wordlists.words(id))
         wordc = len(set(wordlists.words(id)))
         wor = "=> corpus tokens: " + `leng` + "\n"
         dis = "=> corpus token types: " + `wordc` + "\n"
         ric = "=> ind lex richness: " + `leng / wordc` + "\n"
         res.append(dis)
         res.append(ric)
         res.append(wor)
         for word in sorted(set(wordlists.words(id))):
             freq = (wordlists.words(id)).count(word)
             f = "(" + word.lower() + "," + `round(100 * (freq / leng),1)` + ")\n"
             t = "(" + word.lower() + "," + `freq` + "/" + `leng` + ")"
             res.append(f)
             res.append(t)
     out = open("../data/"+filename,"w")
     try:
         for t in res[:n]:
             out.write(t + "\n")
     finally:
         out.close()
开发者ID:camilothorne,项目名称:nasslli2016,代码行数:27,代码来源:lexstatistics.py

示例2: compare

# 需要导入模块: from nltk.corpus import PlaintextCorpusReader [as 别名]
# 或者: from nltk.corpus.PlaintextCorpusReader import words [as 别名]
def compare(request):
    errors = []
    statistics=[]
    stats=[]
    for x in range(1,3):
           cantoname = "canto"+str(x)+".txt"
           w=PlaintextCorpusReader("./",cantoname);
           w.words();
           t=nltk.text.Text(w.words());
           l_lines=len(line_tokenize(w.raw()))
           l_uwords=len(set(w.words()))
           l_words=len(w.words())
           l_sents=len(w.sents())
           l_paras=len(w.paras())
           l_linperpara=l_lines/l_paras
           statistics.append(x)
           statistics.append("Number of Words - "+ str(l_words))
           statistics.append("Number of Unique Words - "+ str(l_uwords))
           statistics.append("Number of Setences - "+ str(l_sents))
           statistics.append("Number of Lines - "+ str(l_lines))
           statistics.append("Number of Paras - "+ str(l_paras))
           statistics.append("Number of Lines/Paras - "+ str(l_linperpara))
           lexical_density=l_words/l_uwords
           l_wordpersent = l_words/l_sents
           statistics.append("Lexical Density (Total/Uniq) words- "+ str(lexical_density))
           statistics.append("Words per sentence - "+ str(l_wordpersent))
           stats.append(statistics)
           
    return render_to_response('compare.html', {'stats':statistics})
开发者ID:prashaantt,项目名称:savitri-labs,代码行数:31,代码来源:views.py

示例3: stats

# 需要导入模块: from nltk.corpus import PlaintextCorpusReader [as 别名]
# 或者: from nltk.corpus.PlaintextCorpusReader import words [as 别名]
def stats(request):
    errors = []
    statistics=[]
    if 'q' in request.GET:
        q = request.GET['q']
        if not q:
            errors.append('Enter a Canto Number')
        else:
           cantoname = "canto"+q+".txt"
           w=PlaintextCorpusReader("./",cantoname);
           w.words();
           t=nltk.text.Text(w.words());
           l_lines=len(line_tokenize(w.raw()))
           l_uwords=len(set(w.words()))
           l_words=len(w.words())
           l_sents=len(w.sents())
           l_paras=len(w.paras())
           l_linperpara=l_lines/l_paras
           statistics.append("Number of Words - "+ str(l_words))
           statistics.append("Number of Unique Words - "+ str(l_uwords))
           statistics.append("Number of Setences - "+ str(l_sents))
           statistics.append("Number of Lines - "+ str(l_lines))
           statistics.append("Number of Paras - "+ str(l_paras))
           statistics.append("Number of Lines/Paras - "+ str(l_linperpara))
           lexical_density=l_words/l_uwords
           l_wordpersent = l_words/l_sents
           statistics.append("Lexical Density (Total/Uniq) words- "+ str(lexical_density))
           statistics.append("Words per sentence - "+ str(l_wordpersent))
           return render_to_response('stats.html', {'statistics':statistics})
    return render_to_response('stats.html', {'errors': errors})
开发者ID:prashaantt,项目名称:savitri-labs,代码行数:32,代码来源:views.py

示例4: hybrid_cfdist

# 需要导入模块: from nltk.corpus import PlaintextCorpusReader [as 别名]
# 或者: from nltk.corpus.PlaintextCorpusReader import words [as 别名]
def hybrid_cfdist():
    sherlock_corpus = PlaintextCorpusReader(CORPUS_ROOT_SHERLOCK, '.*', encoding='utf-8')
    sherlock_bigrams = nltk.bigrams(sherlock_corpus.words())

    pokemon_corpus = PlaintextCorpusReader(CORPUS_ROOT_POKEMON, '.*', encoding='utf-8')
    pokemon_bigrams = nltk.bigrams(pokemon_corpus.words())

    return nltk.ConditionalFreqDist(sherlock_bigrams + pokemon_bigrams)
开发者ID:mikeholler,项目名称:CSC499-NLP,代码行数:10,代码来源:text_generation.py

示例5: corpus_metrics

# 需要导入模块: from nltk.corpus import PlaintextCorpusReader [as 别名]
# 或者: from nltk.corpus.PlaintextCorpusReader import words [as 别名]
    def corpus_metrics(self, corpus_path):
        corpus_news = PlaintextCorpusReader(corpus_path, '.*\.txt')

        print('Corpus documents',  len(corpus_news.fileids()))
        print('Train documents', len([c for c in corpus_news.fileids() if c.startswith('train')]))
        print('Dev documents', len([c for c in corpus_news.fileids() if c.startswith('dev')]))
        print('Test documents', len([c for c in corpus_news.fileids() if c.startswith('test')]))

        words = set(corpus_news.words())
        words = sorted(words)
        print('Corpus different words', len(words))


        longwords = [w for w in corpus_news.words() if len(w) > 2]

        fdist = nltk.FreqDist(longwords)

        bigramController = BigramController()

        bigrams = bigramController.BuildBrigramFeatures(longwords)

        bigramController.BigramStatistics(bigrams)



        trigramdist = nltk.FreqDist(nltk.trigrams(longwords))

        #fdist.plot(50, cumulative=False)

        print(fdist.most_common(20))
        print("Trigram distribution")
        print(trigramdist.most_common(20))

        words_attack = []
        files_attack = [f for f in corpus_news.fileids()
                        if os.path.basename(os.path.normpath(f)).startswith('attack--')]
        for file in files_attack:
            for w in corpus_news.words(file):
                words_attack.append(w)
        words_nonattack = []
        files_nonattack = [f for f in corpus_news.fileids()
                           if os.path.basename(os.path.normpath(f)).startswith('nonattack--')]
        for file in files_nonattack:
            for w in corpus_news.words(file):
                words_nonattack.append(w)


        words_bag = { }
        words_bag['attack'] = words_attack
        words_bag['nonattack'] = words_nonattack
        #print(words_bag['attack'])
        cfd = nltk.ConditionalFreqDist((category, word)
                                       for category in ['attack', 'nonattack']
                                       for word in words_bag[category]
                                       )
开发者ID:gcvalderrama,项目名称:Palantir,代码行数:57,代码来源:custom_metrics.py

示例6: main

# 需要导入模块: from nltk.corpus import PlaintextCorpusReader [as 别名]
# 或者: from nltk.corpus.PlaintextCorpusReader import words [as 别名]
def main():
	current_directory = os.path.dirname(__file__)
	corpus_root = os.path.abspath(current_directory)
	wordlists = PlaintextCorpusReader(corpus_root, 'Islip13Rain/.*\.txt')
	wordlists.fileids()
	ClassEvent = nltk.Text(wordlists.words())
	CEWords = ["Long Island", "Weather Service", "flooding", "August", 
		"heavy rains", "Wednesday", "Suffolk County", "New York", "rainfall",
		"record"]

	# ClassEvent Statistics
	print "--------- CLASS EVENT STATISTICS -------------"
	print "ClassEvent non stopwords", non_stopword_fraction(ClassEvent)	
	print "ClassEvent WORD LENGTH DISTRIBUTIONS:"
	print_word_length_distributions(ClassEvent)
	print "ClassEvent PERCENTAGE OF WORD OCCURRENCES:"
	print_percentage_of_word_in_collection(ClassEvent, CEWords)
	
	ClassEventLettersPerWord = average_letters_per_word(ClassEvent)
	ClassEventWordsPerSent = len(wordlists.words()) / len(wordlists.sents())
	ClassEventARI = (4.71 * ClassEventLettersPerWord) + (0.5 * \
		ClassEventWordsPerSent) - 21.43
	
	print "Average number of letters per word", ClassEventLettersPerWord
	print "Average number of words per sentence:", ClassEventWordsPerSent
	print "Automated Readability Index:", ClassEventARI


	print 

	wordlists_event = PlaintextCorpusReader(corpus_root, "Texas_Wild_Fire/.*\.txt")
	wordlists_event.fileids()
	YourSmall = nltk.Text(wordlists_event.words())
	SmallEventWords = ["Fire", "Wildfire", "Water", "Damage", "Ground", "Burn", 
		"Town", "Heat", "Wind", "Speed", "Size", "City", "People", "Home",
		"Weather", "Debris", "Death", "Smoke", "State", "Ash"]
	

	# YourSmall statistics
	print "--------- YOUR SMALL STATISTICS --------------"
	print "Texas_Wild_Fire", non_stopword_fraction(YourSmall)
	print "YourSmall WORD LENGTH DISTRIBUTIONS:"
	print_word_length_distributions(YourSmall)
	print "YourSmall PERCENTAGE OF WORD OCCURRENCES:"
	print_percentage_of_word_in_collection(YourSmall, SmallEventWords)
	
	YourSmallLettersPerWord = average_letters_per_word(YourSmall)
	YourSmallWordsPerSent = len(wordlists_event.words()) / \
		len(wordlists_event.sents())
	YourSmallARI = (4.71 * YourSmallLettersPerWord) + (0.5 * \
		YourSmallWordsPerSent) - 21.43

	print "Average number of letters per word", YourSmallLettersPerWord
	print "Average number of words per sentence:", YourSmallWordsPerSent
	print "Automated Readability Index", YourSmallARI
开发者ID:jplahn,项目名称:NLP-Capstone,代码行数:57,代码来源:Statistics.py

示例7: get_coarse_level_features

# 需要导入模块: from nltk.corpus import PlaintextCorpusReader [as 别名]
# 或者: from nltk.corpus.PlaintextCorpusReader import words [as 别名]
def get_coarse_level_features(dataset, output_file):
# accessing the corpus
    corpus_root = '/home1/c/cis530/data-hw2/' 
    dataset_path = corpus_root + dataset

# Reading the files from the directories
    files = PlaintextCorpusReader(dataset_path, '.*')
    ids = files.fileids()
    stopFile = PlaintextCorpusReader(corpus_root, 'stopwlist.txt')
    stops = stopFile.words()

#Opening a file that has to be written to
    out = open(output_file, 'w')

    for i in range(0,len(ids) - 1):
#Initializing certain variables
        tokens_count=0
        types = 0
        non_stops_count=0
        sents_count = 0
        avg_sent_len=0
        cap_count = 0

        tokens=files.words(ids[i])
#Computing Number of Tokens
        tokens_count = len(tokens)

#Computing Number of types
        types = len(set(tokens))
        non_stops=[]

#Computing Number of Content Words
        for t in tokens:
            if t not in stops:
                non_stops.append(t)
        non_stops_count = len(non_stops)

#Finding Average Sentence Length
        sent = []
        sent = files.sents(ids[i])
        sents_count = len(sent)
        sent_len=0
        for s in sent:
            sent_len = sent_len + len(s)
        avg_sent_len = sent_len/float(sents_count)

#Computing Number of Captilized Words
        for c in non_stops:
            if c.istitle():
                cap_count = cap_count+1
        current_file = dataset + '/' + ids[i]
        e = current_file.split('/')
        out.write(current_file +' '+ e[-2] + ' tok:' + str(tokens_count) + ' typ:' + \
str(types) + ' con:' + str(non_stops_count) + ' sen:' + str(sents_count) + ' len:' + str(avg_sent_len) + ' cap:' + str(cap_count)+ '\n')
        out.flush()
开发者ID:madhuraraju,项目名称:NLP_Class_Code_Samples,代码行数:57,代码来源:CL_Two_Code_rmadhura.py

示例8: loadCorpora

# 需要导入模块: from nltk.corpus import PlaintextCorpusReader [as 别名]
# 或者: from nltk.corpus.PlaintextCorpusReader import words [as 别名]
def loadCorpora():

    corpus_root = '/usr/share/dict'
    wordlists = PlaintextCorpusReader(corpus_root, '.*')
    wordlists.fileids()
    wordlists.words('connectives')

    corpus_root = r"C:\corpora\penntreebank\parsed\mrg\wsj"
    file_pattern = r".*/wsj_.*\.mrg" 
    ptb = BracketParseCorpusReader(corpus_root, file_pattern)
    ptb.fileids()
    len(ptb.sents())
    ptb.sents(fileids='20/wsj_2013.mrg')[19]
开发者ID:AkiraKane,项目名称:Python,代码行数:15,代码来源:c02_text_corpora.py

示例9: get_lm_features

# 需要导入模块: from nltk.corpus import PlaintextCorpusReader [as 别名]
# 或者: from nltk.corpus.PlaintextCorpusReader import words [as 别名]
def get_lm_features(dataset, output_file):      
    corpus_root = '/home1/c/cis530/data-hw2/'
    bigram_root = corpus_root + 'Language_model_set/'

    fin_files = PlaintextCorpusReader(bigram_root+'Finance/','.*')
    fin_words = list(fin_files.words())
    fin_model = NGramModel(fin_words, 2)

    health_files = PlaintextCorpusReader(bigram_root+'Health/','.*')
    health_words = list(health_files.words())
    health_model = NGramModel(health_words, 2)

    res_files = PlaintextCorpusReader(bigram_root+'Research/','.*')
    res_words = list(res_files.words())
    res_model = NGramModel(res_words, 2)

    com_files = PlaintextCorpusReader(bigram_root+'Computers_and_the_Internet/','.*')
    com_words = list(com_files.words())
    com_model = NGramModel(com_words, 2)

    test_files = PlaintextCorpusReader(corpus_root+dataset, '.*')
    ids = test_files.fileids()

    out_file = open(output_file,'w')

    for j in range(0,len(ids)):
        file_words = test_files.words(ids[j])
        out_str = ''
        current_file = dataset + '/'+ids[j]
        e = current_file.split('/')
        out_str = out_str + current_file+ ' '+e[-2]
        sum_fin=0
        sum_health=0
        sum_res=0
        sum_com=0                                                                         
        text_len = len(file_words)
        for i in range(1,len(file_words)):
            sum_fin = sum_fin + math.log(fin_model.prob((file_words[i-1],),file_words[i]))
            comp_fin = float((-sum_fin)*(1/float(text_len)))
            sum_health = sum_health + math.log(health_model.prob((file_words[i-1],),file_words[i]))

            comp_health = (float(-sum_health))*(1/float(text_len))
            sum_res = sum_res + math.log(res_model.prob((file_words[i-1],),file_words[i]))
            comp_res = (float(-sum_res))*(1/float(text_len))
            sum_com = sum_com + math.log(com_model.prob((file_words[i-1],),file_words[i])) 
            comp_com = (float(-sum_com))*(1/float(text_len))
            out_str = out_str + ' finprob:'+str(round(sum_fin,2))+' hlprob:'+str(round(sum_health,2))+' resprob:'\
+str(round(sum_res,2))+ ' coprob:' + str(round(sum_com,2)) + ' finper:' + str(round(comp_fin,2)) + ' hlper:'+\
str(round(comp_health,2))+ ' resper:' + str(round(comp_res,2)) + ' coper:' + str(round(comp_com,2)) 
           out_file.write(out_str + '\n')
           out_file.flush()
开发者ID:madhuraraju,项目名称:NLP_Class_Code_Samples,代码行数:53,代码来源:CL_Two_Code_rmadhura.py

示例10: plot_cfreq

# 需要导入模块: from nltk.corpus import PlaintextCorpusReader [as 别名]
# 或者: from nltk.corpus.PlaintextCorpusReader import words [as 别名]
 def plot_cfreq(self,corpus,patt,n):
     wordlists = PlaintextCorpusReader(corpus,patt)
     fileids = wordlists.fileids()
     for id in fileids:
         words = wordlists.words(id)
         fre = FreqDist(word.lower() for word in words if word.isalpha()) 
     return fre.plot(n,cumulative=True)
开发者ID:camilothorne,项目名称:nasslli2016,代码行数:9,代码来源:lexstatistics.py

示例11: __init__

# 需要导入模块: from nltk.corpus import PlaintextCorpusReader [as 别名]
# 或者: from nltk.corpus.PlaintextCorpusReader import words [as 别名]
class BigramModel:
        category_root=[]
        files_dataset_category=[]
        word_list=[]
        bigram=[]
        fd = []
        cfd = []
        def __init__(self,category,corpus_root):
                self.category_root=[]
                self.files_dataset_category=[]
                self.word_list=[]
                self.bigram=[]
                self.fd = []
                self.cfd = []
                self.category_root=corpus_root+'/'+category
                self.files_dataset_category=PlaintextCorpusReader(self.category_root,'.*')
                self.word_list = self.files_dataset_category.words()
                self.bigram = nltk.bigrams(self.word_list)
                self.fd = FreqDist(self.word_list)
                self.cfd = nltk.ConditionalFreqDist(self.bigram)
        def get_prob_and_per(self,word_list):
                # The function takes a word_list and return both the log probability and log perplexity under the language model 
                n_types = len(set(word_list))
                n_tokens=len(word_list)
                # Calculate Log Prob with Laplace smoothing.
                log_prob = math.log(self.fd[word_list[0]]+1)-math.log(n_tokens+n_types)  #initializing prob for the first word
                for (w1,w2) in nltk.bigrams(word_list):
                    log_prob = log_prob+math.log(self.cfd[w1][w2]+1)-math.log(len(self.cfd[w1].keys())+n_types)
                # Calculate Log Perplexity
                log_per=float(1)/float(-n_tokens)*log_prob
                return log_prob, log_per
开发者ID:gabhi,项目名称:new-york-times-summarization,代码行数:33,代码来源:topic-classification.py

示例12: get_lm_features

# 需要导入模块: from nltk.corpus import PlaintextCorpusReader [as 别名]
# 或者: from nltk.corpus.PlaintextCorpusReader import words [as 别名]
def get_lm_features(dataset,output_file):
        # Import the corpus reader
	corpus_root = '/home1/c/cis530/data-hw2/'+dataset
	# Define the folder where the files are situated
	files_dataset = PlaintextCorpusReader(corpus_root, '.*')	
        fin_model = BigramModel('Finance',corpus_root)
        hel_model = BigramModel('Health',corpus_root)
        res_model = BigramModel('Computers_and_the_Internet',corpus_root)
        co_model = BigramModel('Research',corpus_root)
        output = open('/home1/c/cis530/data-hw2/'+output_file,'w')
        for fileid in files_dataset.fileids():
		# Output the docid
		output.write(dataset+'/'+fileid+' ')
		# Output the topic_name
		topic_name=fileid.split('/')[0]
		output.write(topic_name+' ')		
		word_list = files_dataset.words(fileid)
		finprob,finper = fin_model.get_prob_and_per(word_list)		
		hlprob,hlper = hel_model.get_prob_and_per(word_list)	
		resprob,resper = res_model.get_prob_and_per(word_list)
		coprob,coper = co_model.get_prob_and_per(word_list)
		output.write('finprob:'+str(round(finprob,1))+' ')
		output.write('hlprob:'+str(round(hlprob,1))+' ')
		output.write('resprob:'+str(round(resprob,1))+' ')
		output.write('coprob:'+str(round(coprob,1))+' ')
		output.write('finper:'+str(round(finper,1))+' ')
		output.write('hlper:'+str(round(hlper,1))+' ')
		output.write('resper:'+str(round(resper,1))+' ')
		output.write('coper:'+str(round(coper,1))+' ')
		output.write('\n')
        output.close()
开发者ID:gabhi,项目名称:new-york-times-summarization,代码行数:33,代码来源:topic-classification.py

示例13: corpus_from_directory

# 需要导入模块: from nltk.corpus import PlaintextCorpusReader [as 别名]
# 或者: from nltk.corpus.PlaintextCorpusReader import words [as 别名]
def corpus_from_directory(path, filetype='.*'):
	'''
	Make a corpus of all files in a given directory. Can limit type by passing
	the desired extension, proper format is, e.g., '.*\.txt'
	'''
	corpus_reader = PlaintextCorpusReader(path, filetype)
	return nltk.Text( corpus_reader.words() )
开发者ID:campustimes,项目名称:pnlp-final-project,代码行数:9,代码来源:corpustools.py

示例14: prepare_pos_features

# 需要导入模块: from nltk.corpus import PlaintextCorpusReader [as 别名]
# 或者: from nltk.corpus.PlaintextCorpusReader import words [as 别名]
def prepare_pos_features(Language_model_set, output_file):
    corpus_root = '/home1/c/cis530/data-hw2/' + Language_model_set
    texts = PlaintextCorpusReader(corpus_root, '.*')
    text = texts.words()
    tagged_text = nltk.pos_tag(text)
    merged_tag_text = mergeTags(tagged_text)
    lists = seperate_pos(merged_tag_text)
    nouns_dist = FreqDist(lists[0])
    top_nouns = nouns_dist.keys()[:200]
    verbs_dist = FreqDist(lists[1])
    top_verbs =verbs_dist.keys()[:200]
    advs_dist = FreqDist(lists[2])
    top_advs =advs_dist.keys()[:100]
    prep_dist = FreqDist(lists[3])
    top_preps =prep_dist.keys()[:100]
    adjs_dist = FreqDist(lists[4])
    top_adjs =adjs_dist.keys()[:200]


    out = open(output_file, 'w')

    for n in top_nouns:
        out.write('NN'+ n + '\n')
    for v in top_verbs:
        out.write('VV'+ v + '\n')
    for av in top_advs:
        out.write('ADV'+ av + '\n')
    for p in top_preps:
        out.write('PREP'+ p + '\n')
    for aj in top_adjs:
        out.write('ADJ'+ aj + '\n')
开发者ID:madhuraraju,项目名称:NLP_Class_Code_Samples,代码行数:33,代码来源:CL_Two_Code_rmadhura.py

示例15: similar

# 需要导入模块: from nltk.corpus import PlaintextCorpusReader [as 别名]
# 或者: from nltk.corpus.PlaintextCorpusReader import words [as 别名]
def similar (text, word):
    if re.match ("^[a-zA-Z0-9_\(\),\.]+$", text) and re.match ("^[a-zA-Z0-9_]+$", word):
        text = '%s.txt' % text
        
        f = open(os.path.join(CORPUS_ROOT, text), 'r')
        source = f.read()
        f.close()
        
        corpus = PlaintextCorpusReader(CORPUS_ROOT, [text])
        n_text = nltk.text.Text(corpus.words(text))
        context_index = nltk.text.ContextIndex(n_text.tokens, filter=lambda x:x.isalpha(), key=lambda s:s.lower())
        word = word.lower()
        wci = context_index._word_to_contexts
        result = []
        
        if word in wci.conditions():
            contexts = set(wci[word])
            fd = nltk.probability.FreqDist(w for w in wci.conditions() for c in wci[w] if c in contexts and not w == word)
            words = nltk.util.tokenwrap(fd.keys()[:20])
            
            for middle_word in words.split(' '):
                for context in contexts:
                    if re.search ("/" + context[0] + "(\W|\s)+" + middle_word + "(\W|\s)+" + context[1] + "/i", source) != 'none':
                        print (context[0], middle_word, context[1])
                        result.append ({'word': word, 'context_left': context[0], 'context_right': context[1]})
            
        return dumps ({'name': text, 'word': word, 'result': result})    
开发者ID:osp,项目名称:osp.work.vj12,代码行数:29,代码来源:vj12.py


注:本文中的nltk.corpus.PlaintextCorpusReader.words方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。