当前位置: 首页>>代码示例>>Python>>正文


Python PorterStemmer.stem方法代码示例

本文整理汇总了Python中nltk.stem.porter.PorterStemmer.stem方法的典型用法代码示例。如果您正苦于以下问题:Python PorterStemmer.stem方法的具体用法?Python PorterStemmer.stem怎么用?Python PorterStemmer.stem使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.stem.porter.PorterStemmer的用法示例。


在下文中一共展示了PorterStemmer.stem方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: parse_questions

# 需要导入模块: from nltk.stem.porter import PorterStemmer [as 别名]
# 或者: from nltk.stem.porter.PorterStemmer import stem [as 别名]
    def parse_questions(self):
        stemmer = PorterStemmer()
        tokenizer = RegexpTokenizer(r'\w+')
        for questions_key in self.rawSamples:
            # Stem the Question Text
            question_text = self.rawSamples[questions_key][0]
            words_array = tokenizer.tokenize(question_text)
            question_text = ""
            for word in words_array:
                if word.isnumeric():
                    continue
                if word not in text.ENGLISH_STOP_WORDS:
                    word = stemmer.stem(word)
                word = stemmer.stem(word)
                question_text += (word + " ")
            self.rawSamples[questions_key][0] = question_text

            # Stem the topic names
            topics_text = self.rawSamples[questions_key][2]
            words_array = tokenizer.tokenize(topics_text)
            topics_text = ""
            for word in words_array:
                if word.isnumeric():
                    continue
                if word not in text.ENGLISH_STOP_WORDS:
                    word = stemmer.stem(word)
                word = stemmer.stem(word)
                topics_text += (word + " ")
            self.rawSamples[questions_key][2] = topics_text
开发者ID:suket22,项目名称:CS246,代码行数:31,代码来源:LoadData.py

示例2: extract_entities

# 需要导入模块: from nltk.stem.porter import PorterStemmer [as 别名]
# 或者: from nltk.stem.porter.PorterStemmer import stem [as 别名]
def extract_entities(doc):
    print 'extracting entities from %s...' % doc.getFilename()
    nps = list(set([re.sub(' \.', '', re.sub(' -[A-Z]{3}-', '', np).lower()) for np in doc.getAllNodesOfType('NP')]))
    p = PorterStemmer()
    entities = []
    for np in nps:
        try:
            response = json.loads(requests.get(host+'select', params={'q': 'wam:[50 TO 100] AND iscontent:true AND lang:en AND (title_en:"%s" OR redirect_titles_mv_en:"%s")' % (np, np), 'fl': 'title_en,redirect_titles_mv_en', 'wt': 'json'}).content)
        except requests.exceptions.ConnectionError:
            while True:
                time.sleep(15)
                print 'retrying connection...'
                try:
                    response = json.loads(requests.get(host+'select', params={'q': 'wam:[50 TO 100] AND iscontent:true AND lang:en AND (title_en:"%s" OR redirect_titles_mv_en:"%s")' % (np, np), 'fl': 'title_en,redirect_titles_mv_en', 'wt': 'json'}).content)
                    break
                except requests.exceptions.ConnectionError:
                    continue
        docs = response[u'response'][u'docs']
        if len(docs) > 0:
            titles = [docs[0][u'title_en']] + docs[0].get(u'redirect_titles_mv_en', [])
        else:
            titles = []
        if len(titles) > 0:
            titles = [' '.join([p.stem(w.lower()) for w in t.split(' ')]) for t in titles]
        stem_np = ' '.join([p.stem(w) for w in np.split(' ')])
        for title in titles:
            if stem_np == title:
                entities.append(np)
                print np
                break
    #print doc.getFilename(), entities
    return (doc.getFilename(), entities)
开发者ID:tristaneuan,项目名称:wikia-nlp,代码行数:34,代码来源:video-entity-harvester.py

示例3: AddTopicUnigram

# 需要导入模块: from nltk.stem.porter import PorterStemmer [as 别名]
# 或者: from nltk.stem.porter.PorterStemmer import stem [as 别名]
	def AddTopicUnigram(self, feaName,comName, data = None):	
	#need mapping first
		if data is None:
			data =self._data
			
		for i in range(len(data)):	
			t_bigram = self.getEssayCollocation(data, i)
			
			t_uni = list()
			for (a, b) in t_bigram:
				t_uni.append(a)
				t_uni.append(b)
			t_uni = set(t_uni)
			
			comment = data[i][comName]
			tokens = nltk.wordpunct_tokenize(comment)
			tokens = [word.lower() for word in tokens]
		
			#stemming
			if self._stemoption ==True:
				st = PorterStemmer()
				tokens = [st.stem(t) for t in tokens]
				t_uni  = set([st.stem(t) for t in list(t_uni)])
			shared = [w for w in tokens if w in t_uni]
			#normalized
			data[i][feaName] = float(len(shared))/(len(tokens)+0.00001)
开发者ID:wencanluo,项目名称:Summarization,代码行数:28,代码来源:OrigReader.py

示例4: compare_english_simple

# 需要导入模块: from nltk.stem.porter import PorterStemmer [as 别名]
# 或者: from nltk.stem.porter.PorterStemmer import stem [as 别名]
def compare_english_simple(article_title):
    """Given a title of an article, returns the number of tokens, types, and stems
    in both the English version and the simple English version."""
    english = extract_wikipedia_page(article_title, "en")
    simple = extract_wikipedia_page(article_title, "simple")
    num_tokens_english = len(english)
    num_tokens_simple = len(simple)
    types_english = count_words(get_words(english))
    types_simple = count_words(get_words(simple))
    
    porter_stemmer = PorterStemmer()
    
    stem_english = defaultdict(int)
    stem_simple = defaultdict(int)
    for key in types_english.keys():
        stem_english[porter_stemmer.stem(key)] += 1
    for key in types_simple.keys():
        stem_simple[porter_stemmer.stem(key)] += 1
    
    print ("Number of Tokens in English " + article_title + ": %d" % num_tokens_english)
    print ("Number of Tokens in Simple English " + article_title + ": %d" % num_tokens_simple)
    print ("Number of Types in English " + article_title + ": %d" % len(types_english))
    print ("Number of Types in Simple English " + article_title + ": %d" % len(types_simple))
    print ("Number of Stems in English " + article_title + ": %d" % len(stem_english))
    print ("Number of Stems in Simple English " + article_title + ": %d" % len(stem_simple))
开发者ID:slee17,项目名称:NLP,代码行数:27,代码来源:wikipedia.py

示例5: getDomainUnigram

# 需要导入模块: from nltk.stem.porter import PorterStemmer [as 别名]
# 或者: from nltk.stem.porter.PorterStemmer import stem [as 别名]
	def getDomainUnigram(self, directory = None):		
		collocations = set()  #collocation items
		ewordlists = list() #list of lists of words
		
		#extract words from essays
		if directory is not None:
			doclist = os.listdir(directory)
			for essay in doclist:
				dir_essay  = directory+'/'+essay
				etext = open(dir_essay,'r').read()
				tokens = nltk.wordpunct_tokenize(etext)
				tokens = [word.lower() for word in tokens]
				#stemming
				if self._stemoption ==True:
					st = PorterStemmer()
					tokens = [st.stem(t) for t in tokens]
				
				#extract the collocation for the given essay
				e_bigram = set(Mytext(tokens).collocations())
				collocations = collocations | e_bigram
				ewordlists.append(tokens)
				
		else: # using the mapped essay to calcuate the candidate bigrams
			#need to call mapessay fuction first
			for ins in self._data:
				if ins['essay'] is not None:
					etext = open(ins['essay'],'r').read()
					tokens = nltk.wordpunct_tokenize(etext)
					tokens = [word.lower() for word in tokens]
					#stemming
					if self._stemoption ==True:
						st = PorterStemmer()
						tokens = [st.stem(t) for t in tokens]
				
					#extract the collocation for the given essay
					e_bigram = set(Mytext(tokens).collocations())
					collocations = collocations | e_bigram
					ewordlists.append(tokens)
		
		#get collection of all essays under the specified directory / associated essays
		collection_text = TextCollection(ewordlists)
		
		itemlist = list()
		for (a, b) in collocations:
			itemlist.append(a)
			itemlist.append(b)
			
		itemlist = list(set(itemlist))	
		
		word_idf = []
		for i in range(len(itemlist)):
			word_idf.append((collection_text.idf(itemlist[i]), itemlist[i]))	
		
		word_idf = sorted(word_idf, key = operator.itemgetter(0))
		ave = 0
		if len(word_idf)!=0:
			ave = sum(map(operator.itemgetter(0), word_idf)) / len(word_idf)
			
		wlist =  [j for (i, j) in word_idf if i<ave]				
		return wlist
开发者ID:wencanluo,项目名称:Summarization,代码行数:62,代码来源:OrigReader.py

示例6: __init__

# 需要导入模块: from nltk.stem.porter import PorterStemmer [as 别名]
# 或者: from nltk.stem.porter.PorterStemmer import stem [as 别名]
class PostProcessor:
	def __init__(self):
		"""Loads in Ed and Olivier's domainRules.json file, now converted to a big (7k+ entry) dict object"""
		#import domainRules.json
		from domain_rules import domain_rules
		from tldextract.tldextract import extract
		self.extract = extract
		from nltk.stem.porter import PorterStemmer as PorterStemmer
		self.domain_rules = domain_rules
		#create stemmer
		self.Stemmer = PorterStemmer()
		
	def rerank(self, url, text, results):
		"""Processes classified results"""
		
		#check if the domain exists in domainrules
		domain = self.extract(url)
		domain = domain.domain + "." + domain.suffix
		
		print "Extracted domain: {0}".format(domain)
		
		if domain in self.domain_rules:
			print "found domain"
			if "__ANY" in self.domain_rules[domain]:
				categories = self.domain_rules[domain]['__ANY']
				for cat in categories:
					#stem it
					matchers = [self.Stemmer.stem(cat)]
					if "-" in matchers[0]:
						matchers.append(matchers[0].replace("-", "_"))
					for matcher in matchers:
						for x in range(len(results)):
							print "comparing {0} to {1}".format(matcher, results[x][0])
							if matcher.lower() in results[x][0].lower():
								print "{0} with score {1} contains {2}".format(results[x][0], results[x][1], matcher)
								results[x][1]  = results[x][1] + 1
								print "score is now {0}".format(results[x][1])
		else:
			print "augmenting common words"
			#check for common words
			words = defaultdict(int)
			for result in results:
				tokens = re.findall("[a-z]+", result[0].lower())
				for token in tokens:
					words[token] += 1
			
			#remove single entries
			for k,v in words.iteritems():
				if v > 1:
					for x in range(len(results)):
						matchers = [self.Stemmer.stem(k)]
						if "-" in matchers[0]:
							matchers.append(matchers[0].replace("-", "_"))
						for matcher in matchers:
							if matcher.lower() in results[x][0].lower():
								print "{0} with score {1} contains {2} which has score {3}".format(results[x][0], results[x][1], matcher, v)
								results[x][1] = results[x][1] + v
								print "score is now {0}".format(results[x][1])
		
		return sorted(results, key=lambda x:x[1], reverse=True)
开发者ID:AshBT,项目名称:firefox-machine-learning,代码行数:62,代码来源:ontologyrefactor.py

示例7: extractFeatures

# 需要导入模块: from nltk.stem.porter import PorterStemmer [as 别名]
# 或者: from nltk.stem.porter.PorterStemmer import stem [as 别名]
def extractFeatures(dataSet):
    vector1, vector2 = list(), list()
    
    stemmer = PorterStemmer()
    # Produces list of all unique word stems in the titles in the dataset
    wordBag = list({stemmer.stem(word) for entry in dataSet for word in entry[2].strip().split(" ") if not word in stopwords.words('english')})


    for entry in dataSet:
        genre, isbn, title, authors = entry[0], entry[1].strip(), entry[2].strip(), entry[3].strip()

        wordList, authorList = [word for word in title.split(" ")], [author.strip() for author in authors.split(";")]
        sortedWords = sorted(wordList, key = lambda x: -1*len(x))
        nonStopWords = [word for word in sortedWords if not word in stopwords.words('english')]
        stemmedWords = [stemmer.stem(word) for word in nonStopWords]

        # Quantitative data about the title
        shortestWord = len(nonStopWords[-1])
        longestWord = len(nonStopWords[0])
        meanWord = sum([len(word) for word in nonStopWords])/len(nonStopWords)
        wordSD = (sum([(len(word)-meanWord)**2 for word in nonStopWords])/len(nonStopWords))**.5

        vector1.append([(len(authorList), len(wordList), longestWord, shortestWord, meanWord, wordSD), genre])
        
        # Creates a vector storing whether a word in a dataset occurred in the title
        occurrences = tuple(1 if word in stemmedWords else 0 for word in wordBag)
        
        vector2.append([occurrences, genre])

    return (vector1,vector2)
开发者ID:fabianvf,项目名称:k-fold-cross-validation,代码行数:32,代码来源:main.py

示例8: search

# 需要导入模块: from nltk.stem.porter import PorterStemmer [as 别名]
# 或者: from nltk.stem.porter.PorterStemmer import stem [as 别名]
def search(ngrams, index, path, counts, id):

    print 'Searching {}'.format(path.split('/')[-1])

    # If 'Graph!' button was hit with nothing in box
    if ngrams == '':
        return None

    if len(ngrams) > 1:
        ngrams = ngrams.replace(', ', ',').encode('utf-8').lower().split(',')
    else:
        ngrams = ngrams.encode('utf-8').lower()

    ngram_count = {ngram: defaultdict(int) for ngram in ngrams}
    stemmer = PorterStemmer()

    for ngram in ngrams:

        transcripts = list()

        for word in ngram.split():

            # Get stem of word
            word = stemmer.stem(word)

            try:
                # Get set of books the word appears in
                transcripts.append(set([posting[0] for posting in index[word]]))
            except:
                # If the word is not in the index
                pass

        # Get the set of transcripts in which all words in the ngram appear
        transcripts = set.intersection(*transcripts) if len(transcripts) > 0 else set()

        for transcript in transcripts:

            year = int(transcript.split('-')[1])
            month = int(transcript.split('-')[2])
            day = int(transcript.split('-')[3])
            date = datetime(year, month, day)
            locs = []

            # For each transcript, get all of the locations of where the words in the ngram appear
            for word in ngram.split():
                word = stemmer.stem(word)
                locs.extend([posting[1] for posting in index[word] if posting[0] == transcript])

            # Check if the words are next to each other
            # e.g. ngram = 'very high profit margin' and the positions of the words are [[2,10] [3], [4,8,12,29], [5]]
            # This line of code will shift the position of each word over by its distance from the
            # beginning of the ngram to produce new positions [[2,10], [2], [2,6,10,29], [2]]
            # Then I take the intersection of these positions -- if it's not empty,
            # then the ngram appears in the transcript
            locs = [set([int(pos) - i for pos in loc]) for i, loc in enumerate(locs)]
            ngram_count[ngram][date] += len(set.intersection(*locs))

    counts[id] = ngram_count
    print 'Finished searching {}'.format(path.split('/')[-1])
开发者ID:trevorlindsay,项目名称:earnings-calls,代码行数:61,代码来源:search.py

示例9: stem

# 需要导入模块: from nltk.stem.porter import PorterStemmer [as 别名]
# 或者: from nltk.stem.porter.PorterStemmer import stem [as 别名]
def stem(ts):
	global stemmer
	if stemmer is None:
		stemmer = PorterStemmer()
	if type(ts) is list:
		return [stemmer.stem(x) for x in ts]
	else:
		return stemmer.stem(ts)
开发者ID:mattea,项目名称:mattea-utils,代码行数:10,代码来源:parser.py

示例10: get_bleu_similarity

# 需要导入模块: from nltk.stem.porter import PorterStemmer [as 别名]
# 或者: from nltk.stem.porter.PorterStemmer import stem [as 别名]
def get_bleu_similarity(reference_answers, student_answer):
	porter_stemmer = PorterStemmer()
	reference_answers_tokens = []
	for answer in reference_answers:
		reference_answers_tokens.append(map(lambda x: str(porter_stemmer.stem(x)), answer.split()))
	student_answer = map(lambda x: str(porter_stemmer.stem(x)), student_answer.split())
	weights = [0.25, 0.25]
	return bleu(student_answer,reference_answers_tokens, weights)
开发者ID:dubstack,项目名称:asag,代码行数:10,代码来源:extract_features.py

示例11: PropertyFinder

# 需要导入模块: from nltk.stem.porter import PorterStemmer [as 别名]
# 或者: from nltk.stem.porter.PorterStemmer import stem [as 别名]
class PropertyFinder(object):
    def __init__(self):
        self._stemmer = PorterStemmer()

    def __get_property_string_forms(self, property_subtree):
        words = stopwords.words('english')

        property_string_forms = set()
        property_string_forms.add((' '.join(property_subtree.leaves())).lower())
        property_string_forms.add((' '.join([self._stemmer.stem(word) for word in property_subtree.leaves()])).lower())
        property_string_forms.add((' '.join([word for word in property_subtree.leaves() if word not in words])).lower())
        property_string_forms.add((' '.join([self._stemmer.stem(word) for word in property_subtree.leaves() if word not in words])).lower())

        return property_string_forms

    def __fetch_from_wikibase(self, property_string):
        labels = DataBase().search_properties_name(property_string)
        if labels is None:
            return []
        return [label.lower() for label in labels]

    def __fetch_synonyms_and_hypernyms(self, property_string):
        words = set()
        synsets = wordnet.synsets(property_string)
        for synset in synsets:
            words.update([lemma.replace('_', ' ').lower() for lemma in synset.lemma_names()])
            for hypernym in synset.hypernyms():
                words.update([lemma.replace('_', ' ').lower() for lemma in hypernym.lemma_names()])
        return words

    def find_candidates(self, property_subtree):
        if not isinstance(property_subtree, ParentedTree):
            raise AttributeError

        candidates = set(self.__get_property_string_forms(property_subtree))

        new_candidates = set()
        for candidate in candidates:
            for label in self.__fetch_from_wikibase(candidate):
                new_candidates.add(label)
        candidates.update(new_candidates)

        new_candidates = set()
        for candidate in candidates:
            new_candidates.update(self.__fetch_synonyms_and_hypernyms(candidate))
        candidates.update(new_candidates)

        new_candidates = set()
        for candidate in candidates:
            for POS in [wordnet.ADJ, wordnet.ADV, wordnet.NOUN, wordnet.VERB]:
                morphy = wordnet.morphy(candidate, POS)
                if morphy is not None:
                    new_candidates.add(morphy)
        candidates.update(new_candidates)

        return candidates
开发者ID:EIFSDB,项目名称:search-engine,代码行数:58,代码来源:property_finder.py

示例12: __weight_tokens

# 需要导入模块: from nltk.stem.porter import PorterStemmer [as 别名]
# 或者: from nltk.stem.porter.PorterStemmer import stem [as 别名]
    def __weight_tokens(self, mid, nps, sentences, sent_id):
        st          = PorterStemmer()
        sent_target = sentences[sent_id]
        token_id    = [idx for idx, token in enumerate(sent_target.strip().split(" ")) if mid in token][0]

        sent_lengths= [len(s.split(" ")) for s in sentences]

        nps_base = {np:" ".join(st.stem(token) for token in np.split(" ")) for np in nps}
        nps_proc = {}

        for sent_idx, sent in enumerate(sentences):
            sent_stem = " ".join(st.stem(token) for token in sent.split(" "))
            for np_ori, np in nps_base.iteritems():
                if np_ori not in nps_proc: nps_proc[np_ori] = {}

                if "dist_sent" not in nps_proc[np_ori] or abs(sent_idx - sent_id) < nps_proc[np_ori]["dist_sent"]:
                    #always update the info
                    if np not in sent_stem: 
                        continue
                    np_idx      = sent_stem.rindex(np)
                    np_token_idx= len(sent_target[:np_idx].strip().split(" "))
                    dist_start  = len(sent_stem[:np_idx].strip().split(" "))
                    dist_end    = len(sent_stem[np_idx+len(np):].strip().split(" "))

                    dist_sent   = abs(sent_idx - sent_id)
                    dist_token  = -1

                    if dist_sent == 0:
                        if mid in np_ori:
                            dist_token = 0
                        elif np_token_idx < token_id:
                            dist_token = token_id - np_token_idx - (len(np.split(" ")) - 1) - 1
                        elif np_token_idx > token_id:
                            dist_token = np_token_idx - token_id - 1
                    elif sent_idx < sent_id: 
                        dist_token = dist_end + sum(sent_lengths[sent_idx+1:sent_id]) + token_id
                    elif sent_idx > sent_id:
                        dist_token = (len(sent_target.strip().split(" "))-1-token_id) + sum(sent_lengths[sent_id+1:sent_idx]) + dist_start

                    nps_proc[np_ori]["dist_sent"]  = dist_sent
                    nps_proc[np_ori]["dist_token"] = dist_token

                np_count = sent_stem.count(np)
                nps_proc[np_ori]["tf"] = (nps_proc[np_ori].get("tf") or 0) + np_count

        nps_weight = {}
        for np, vals in nps_proc.iteritems():
            term1 = self.__alpha * self.__gaussian_weight(vals["dist_token"], self.__var_d)
            term2 = self.__beta  * self.__gaussian_weight(vals["dist_sent"],  self.__var_s)
            term3 = self.__gamma * vals["tf"]
            nps_weight[np] = (term1 + term2 + term3) / (self.__alpha + self.__beta + self.__gamma)
        return nps_weight
开发者ID:frozstone,项目名称:concept,代码行数:54,代码来源:SentenceParser.py

示例13: preProcessing

# 需要导入模块: from nltk.stem.porter import PorterStemmer [as 别名]
# 或者: from nltk.stem.porter.PorterStemmer import stem [as 别名]
	def preProcessing(self,raw,fileName):
		cachedStopWords = stopwords.words("english")
		stemmer = PorterStemmer()
		text = ' '.join([word for word in raw.split() if word not in cachedStopWords])
		tokens = nltk.word_tokenize(text.lower())
		stemmed = []
		directory = os.getcwd()+"/pre-process/" 
		if not os.path.exists(directory):
			os.makedirs(directory)
		test = open(directory+re.sub('\.htm$', '', fileName)+".txt","w")
		for item in tokens:
			stemmed.append(stemmer.stem(item))
			test.write(stemmer.stem(item)+' ')
		test.close()
		return stemmed
开发者ID:pranavbahl2308,项目名称:VectorSpaceModel,代码行数:17,代码来源:IR.py

示例14: main

# 需要导入模块: from nltk.stem.porter import PorterStemmer [as 别名]
# 或者: from nltk.stem.porter.PorterStemmer import stem [as 别名]
def main():

    rake=RAKE.Rake('SmartStoplist.txt')
    fp=open(input_file,'r')
    text=fp.read()
    text=text_clean(text)
    """wnl=WordNetLemmatizer()
    text=' '.join([wnl.lemmatize(i.strip()) for i in nltk.word_tokenize(text)])"""
    porter_stemmer=PorterStemmer()
    text=' '.join([porter_stemmer.stem(i.strip()) for i in nltk.word_tokenize(text)])
    keywords=rake.run(text)
   # print keywords

    with open(key_score_file,'wb') as out:
        csv_out=csv.writer(out)
        csv_out.writerow(['KEYWORD','SCORE'])
        for row in keywords:
            if row[1]>0:
                csv_out.writerow(row)


    unibitrigram_list=[]
    unibitrigram_list=generate_unibitrigrams(key_score_file)
    #print unibitrigram_list
    #ngram_freq=[]
    ngram_freq=Counter(unibitrigram_list)
    sorted_ngram_freq=sorted(ngram_freq.items(),key=lambda x:x[1],reverse=True )
    print ngram_freq
    with open('bcom_ngramfr_stem.csv','wb') as nf_csv:
        csv_wr=csv.writer(nf_csv)
        for item in sorted_ngram_freq:
            if ((item[0]!='')):
                csv_wr.writerow(item)
开发者ID:neethukurian,项目名称:keyextract,代码行数:35,代码来源:rake_stem.py

示例15: StemmerTokenizer

# 需要导入模块: from nltk.stem.porter import PorterStemmer [as 别名]
# 或者: from nltk.stem.porter.PorterStemmer import stem [as 别名]
class StemmerTokenizer(object):

    def __init__(self): 
        self.stemmer = PorterStemmer()

    def __call__(self, doc):
        return [self.stemmer.stem(t) for t in word_tokenize(doc)]
开发者ID:junliangma,项目名称:graph-based-semi-supervised-learning,代码行数:9,代码来源:b.py


注:本文中的nltk.stem.porter.PorterStemmer.stem方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。