本文整理汇总了Python中sumy.utils.get_stop_words函数的典型用法代码示例。如果您正苦于以下问题:Python get_stop_words函数的具体用法?Python get_stop_words怎么用?Python get_stop_words使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了get_stop_words函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
def main(url, num_sentences=10, language='english'):
parser = HtmlParser.from_url(url, Tokenizer(language))
stemmer = Stemmer(language)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(language)
for sentence in summarizer(parser.document, num_sentences):
print(sentence)
示例2: summarize_url
def summarize_url(url,summarizer):
# E.G. url = "http://www.cnn.com/2016/06/12/politics/hillary-clinton-bernie-sanders-meeting-tuesday/index.html"
print 'Summarizing ', url
parser = HtmlParser.from_url(url, Tokenizer(LANGUAGE))
# or for plain text files
# parser = PlaintextParser.from_file("document.txt", Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
if summarizer == 'luhn':
summarizer = LuhnSummarizer(stemmer)
elif summarizer == 'edmundson':
summarizer = ESummarizer(stemmer)
elif summarizer == 'lsa':
summarizer = LsaSummarizer(stemmer)
elif summarizer == 'lex':
summarizer = LexSummarizer(stemmer)
elif summarizer == 'text':
summarizer = TextSummarizer(stemmer)
elif summarizer == 'sb':
summarizer = SumBasicSummarizer(stemmer)
else:
summarizer = KLSummarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
print summarizer
sentences = []
for sentence in summarizer(parser.document, SENTENCES_COUNT):
print sentence
sentences.append(str(sentence))
return sentences
示例3: summarize
def summarize(corpus, length, algorithm):
summarizer = None
summary = "No compatible summarizer was selected, please use one of these : textrank, lexrank, luhn, edmonson*, kl, lsa, sumbasic, random (* doesn\'t work yet)"
algorithm = algorithm.lower()
try:
parser = PlaintextParser.from_string(corpus,Tokenizer(LANGUAGE))
if algorithm == "textrank":
summarizer = TextRankSummarizer(Stemmer(LANGUAGE))
elif algorithm == "lexrank":
summarizer = LexRankSummarizer(Stemmer(LANGUAGE))
elif algorithm == "luhn":
summarizer = LuhnSummarizer(Stemmer(LANGUAGE))
elif algorithm == "edmundson":
summarizer = EdmundsonSummarizer(Stemmer(LANGUAGE))
elif algorithm == "kl":
summarizer = KLSummarizer(Stemmer(LANGUAGE))
elif algorithm == "lsa":
summarizer = LsaSummarizer(Stemmer(LANGUAGE))
elif algorithm == "sumbasic":
summarizer = SumBasicSummarizer(Stemmer(LANGUAGE))
elif algorithm == "random":
summarizer = RandomSummarizer(Stemmer(LANGUAGE))
if summarizer:
summarizer.stop_words = get_stop_words(LANGUAGE)
summary = " ".join([obj._text for obj in summarizer(parser.document, length)])
return summary
except Exception as e:
return str(e)
示例4: summarize
def summarize(text, n_sentences, sep='\n'):
'''
Args:
text (str or file): text itself or file in memory of text
n_sentences (int): number of sentences to include in summary
Kwargs:
sep (str): separator to join summary sentences
Returns:
(str) n_sentences-long, automatically-produced summary of text
'''
if isinstance(text, str):
parser = PlaintextParser.from_string(text, Tokenizer(LANGUAGE))
elif isinstance(text, file):
parser = PlaintextParser.from_file(text, Tokenizer(LANGUAGE))
else:
raise TypeError('text must be either str or file')
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
return '\n'.join(str(s) for s in summarizer(parser.document, n_sentences))
示例5: summarize_with_info
def summarize_with_info(self, corpus, length, algorithm):
parser = PlaintextParser.from_string(corpus, Tokenizer(self.LANGUAGE))
if algorithm == "textrank":
summarizer = TextRankSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "lexrank":
summarizer = LexRankSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "luhn":
summarizer = LuhnSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "edmundson":
summarizer = EdmundsonSummarizer(Stemmer(self.LANGUAGE))
summarizer.bonus_words = parser.significant_words
summarizer.stigma_words = parser.stigma_words
elif algorithm == "kl":
summarizer = KLSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "lsa":
summarizer = LsaSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "sumbasic":
summarizer = SumBasicSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "random":
summarizer = RandomSummarizer(Stemmer(self.LANGUAGE))
else:
raise NotImplemented("Summary algorithm is not available")
summarizer.stop_words = get_stop_words(self.LANGUAGE)
return summarizer(parser.document, length)
示例6: summarize
def summarize(self, corpus, length, algorithm):
parser = PlaintextParser.from_string(corpus,Tokenizer(self.LANGUAGE))
if algorithm == "textrank":
summarizer = TextRankSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "lexrank":
summarizer = LexRankSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "luhn":
summarizer = LuhnSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "edmundson":
summarizer = EdmundsonSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "kl":
summarizer = KLSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "lsa":
summarizer = LsaSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "sumbasic":
summarizer = SumBasicSummarizer(Stemmer(self.LANGUAGE))
elif algorithm == "random":
summarizer = RandomSummarizer(Stemmer(self.LANGUAGE))
else:
raise NotImplemented("Summary algorithm is not available")
summarizer.stop_words = get_stop_words(self.LANGUAGE)
summary = " ".join([obj._text for obj in summarizer(parser.document, length)])
return summary
示例7: summarizeFile
def summarizeFile(inputFile):
summarizer = LsaSummarizer(stem_word)
summarizer.stop_words = get_stop_words("english")
url = findURLS(inputFile)
if url != None:
if url[-1] == '.':
url = url[0:-1]
#print (url)
#urlContent = 'Summary from URL ['+url+']: \n'
urlContent = ''
try:
parser = HtmlParser.from_url(url, Tokenizer("english"))
for sentence in summarizer(parser.document, 3):
urlContent = urlContent + str(sentence) + '\n'
except:
#print (sys.exc_info()[0])
urlContent = ''
content = inputFile.read()
parser = PlaintextParser.from_string(content, Tokenizer(LANGUAGE))
#summarizer = LsaSummarizer(stem_word)
#summarizer.stop_words = get_stop_words(LANGUAGE)
#summary = 'Event Summary: \n'
summary = ''
try:
for sentence in summarizer(parser.document, SENTENCES_COUNT_1):
summary = summary + str(sentence) + '\n'
except AssertionError:
return None
if url != None:
return summary + urlContent
return summary
示例8: summarize
def summarize(string, summary_length = 1, language = "english"):
string = string.lower() if string.isupper() else string
parser = PlaintextParser.from_string(string, Tokenizer(language))
stemmer = Stemmer(language)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(language)
return ". ".join([str(sentence) for sentence in summarizer(parser.document, summary_length)])
示例9: luhn
def luhn(self,text_parser):
assert isinstance(text_parser,plaintext.PlaintextParser)
summarizer=Luhn()
#EnglishStemmer())
#summarizer.stop_words=stopwords.words("english")
summarizer.stop_words=get_stop_words(settings.SUMMARIZER_LANGUAGE)
return summarizer(text_parser.document,settings.SUMMARIZER_TOP_X_SENTENCES)
示例10: summarize
def summarize(url):
summary = []
parser = HtmlParser.from_url(url,Tokenizer(lang))
stemmer = Stemmer(lang)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(lang)
for sentence in summarizer(parser.document,sent):
summary.append(sentence._text)
return ' '.join(summary)
示例11: summarize
def summarize(text):
total = ""
parser = PlaintextParser.from_string(text, Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
for sentence in summarizer(parser.document, SENTENCES_COUNT):
total += str(sentence)
return total
示例12: lsa
def lsa(comment,parser,num):
summarizer = LsaSummarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
LSAstr = ''
for sentence in summarizer(parser.document,num):
LSAstr += str(sentence)
return LSAstr
示例13: summarizeText
def summarizeText(self, body, numSentences = 10):
"""Summarizes body of text to numSentences
"""
#parser = PlaintextParser.from_string(body, Tokenizer(self.LANG))
parser = PlaintextParser.from_string(body, Tokenizer(self.LANG))
stemmer = Stemmer(self.LANG)
summarizer = SumySummarizer(stemmer)
summarizer.stop_words = get_stop_words(self.LANG)
summary = ' '.join([str(sentence).decode('utf-8') for sentence in summarizer(parser.document, numSentences)])
return summary
示例14: summarize
def summarize(content):
parser = PlaintextParser.from_string(content.body, Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
text = '\n'.join(
[str(sentence) for sentence in summarizer(parser.document, COUNT)]
)
summary = Summary(content=content, summary=text)
summary.save()
示例15: retreive_sumy
def retreive_sumy(url):
# "http://en.wikipedia.org/wiki/Automatic_summarization"
parser = HtmlParser.from_url(url, Tokenizer(LANGUAGE))
# or for plain text files
# parser = PlaintextParser.from_file("document.txt", Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
return summarizer(parser.document, SENTENCES_COUNT)