本文整理汇总了Python中sumy.summarizers.lsa.LsaSummarizer.stop_words方法的典型用法代码示例。如果您正苦于以下问题:Python LsaSummarizer.stop_words方法的具体用法?Python LsaSummarizer.stop_words怎么用?Python LsaSummarizer.stop_words使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sumy.summarizers.lsa.LsaSummarizer
的用法示例。
在下文中一共展示了LsaSummarizer.stop_words方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: summarize
# 需要导入模块: from sumy.summarizers.lsa import LsaSummarizer [as 别名]
# 或者: from sumy.summarizers.lsa.LsaSummarizer import stop_words [as 别名]
def summarize(text, n_sentences, sep='\n'):
'''
Args:
text (str or file): text itself or file in memory of text
n_sentences (int): number of sentences to include in summary
Kwargs:
sep (str): separator to join summary sentences
Returns:
(str) n_sentences-long, automatically-produced summary of text
'''
if isinstance(text, str):
parser = PlaintextParser.from_string(text, Tokenizer(LANGUAGE))
elif isinstance(text, file):
parser = PlaintextParser.from_file(text, Tokenizer(LANGUAGE))
else:
raise TypeError('text must be either str or file')
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
return '\n'.join(str(s) for s in summarizer(parser.document, n_sentences))
示例2: main
# 需要导入模块: from sumy.summarizers.lsa import LsaSummarizer [as 别名]
# 或者: from sumy.summarizers.lsa.LsaSummarizer import stop_words [as 别名]
def main(url, num_sentences=10, language='english'):
parser = HtmlParser.from_url(url, Tokenizer(language))
stemmer = Stemmer(language)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(language)
for sentence in summarizer(parser.document, num_sentences):
print(sentence)
示例3: summarizeFile
# 需要导入模块: from sumy.summarizers.lsa import LsaSummarizer [as 别名]
# 或者: from sumy.summarizers.lsa.LsaSummarizer import stop_words [as 别名]
def summarizeFile(inputFile):
summarizer = LsaSummarizer(stem_word)
summarizer.stop_words = get_stop_words("english")
url = findURLS(inputFile)
if url != None:
if url[-1] == '.':
url = url[0:-1]
#print (url)
#urlContent = 'Summary from URL ['+url+']: \n'
urlContent = ''
try:
parser = HtmlParser.from_url(url, Tokenizer("english"))
for sentence in summarizer(parser.document, 3):
urlContent = urlContent + str(sentence) + '\n'
except:
#print (sys.exc_info()[0])
urlContent = ''
content = inputFile.read()
parser = PlaintextParser.from_string(content, Tokenizer(LANGUAGE))
#summarizer = LsaSummarizer(stem_word)
#summarizer.stop_words = get_stop_words(LANGUAGE)
#summary = 'Event Summary: \n'
summary = ''
try:
for sentence in summarizer(parser.document, SENTENCES_COUNT_1):
summary = summary + str(sentence) + '\n'
except AssertionError:
return None
if url != None:
return summary + urlContent
return summary
示例4: summarize
# 需要导入模块: from sumy.summarizers.lsa import LsaSummarizer [as 别名]
# 或者: from sumy.summarizers.lsa.LsaSummarizer import stop_words [as 别名]
def summarize(string, summary_length = 1, language = "english"):
string = string.lower() if string.isupper() else string
parser = PlaintextParser.from_string(string, Tokenizer(language))
stemmer = Stemmer(language)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(language)
return ". ".join([str(sentence) for sentence in summarizer(parser.document, summary_length)])
示例5: lsa
# 需要导入模块: from sumy.summarizers.lsa import LsaSummarizer [as 别名]
# 或者: from sumy.summarizers.lsa.LsaSummarizer import stop_words [as 别名]
def lsa(comment,parser,num):
summarizer = LsaSummarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
LSAstr = ''
for sentence in summarizer(parser.document,num):
LSAstr += str(sentence)
return LSAstr
示例6: summarize
# 需要导入模块: from sumy.summarizers.lsa import LsaSummarizer [as 别名]
# 或者: from sumy.summarizers.lsa.LsaSummarizer import stop_words [as 别名]
def summarize(text):
total = ""
parser = PlaintextParser.from_string(text, Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
for sentence in summarizer(parser.document, SENTENCES_COUNT):
total += str(sentence)
return total
示例7: retreive_sumy
# 需要导入模块: from sumy.summarizers.lsa import LsaSummarizer [as 别名]
# 或者: from sumy.summarizers.lsa.LsaSummarizer import stop_words [as 别名]
def retreive_sumy(url):
# "http://en.wikipedia.org/wiki/Automatic_summarization"
parser = HtmlParser.from_url(url, Tokenizer(LANGUAGE))
# or for plain text files
# parser = PlaintextParser.from_file("document.txt", Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
return summarizer(parser.document, SENTENCES_COUNT)
示例8: summarizeText
# 需要导入模块: from sumy.summarizers.lsa import LsaSummarizer [as 别名]
# 或者: from sumy.summarizers.lsa.LsaSummarizer import stop_words [as 别名]
def summarizeText(self, body, numSentences = 10):
"""Summarizes body of text to numSentences
"""
#parser = PlaintextParser.from_string(body, Tokenizer(self.LANG))
parser = PlaintextParser.from_string(body, Tokenizer(self.LANG))
stemmer = Stemmer(self.LANG)
summarizer = SumySummarizer(stemmer)
summarizer.stop_words = get_stop_words(self.LANG)
summary = ' '.join([str(sentence).decode('utf-8') for sentence in summarizer(parser.document, numSentences)])
return summary
示例9: summarize
# 需要导入模块: from sumy.summarizers.lsa import LsaSummarizer [as 别名]
# 或者: from sumy.summarizers.lsa.LsaSummarizer import stop_words [as 别名]
def summarize(content):
parser = PlaintextParser.from_string(content.body, Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
text = '\n'.join(
[str(sentence) for sentence in summarizer(parser.document, COUNT)]
)
summary = Summary(content=content, summary=text)
summary.save()
示例10: summary
# 需要导入模块: from sumy.summarizers.lsa import LsaSummarizer [as 别名]
# 或者: from sumy.summarizers.lsa.LsaSummarizer import stop_words [as 别名]
def summary(self, int1, int2):
# int1, int2 are the places between which to look for
# the summary to be taken (slicing the corpus as a string)
parser = PlaintextParser(self.corpus[int1:int2], Tokenizer("english"))
summarizer = LsaSummarizer(stem_word)
summarizer.stop_words = get_stop_words("english")
self.summary_text = " ".join(
map(lambda x:x._text,
summarizer(parser.document, 20)))
return self.summary_text
示例11: summarize
# 需要导入模块: from sumy.summarizers.lsa import LsaSummarizer [as 别名]
# 或者: from sumy.summarizers.lsa.LsaSummarizer import stop_words [as 别名]
def summarize(filename, num_sentences):
with open (filename, "r") as myfile:
data=myfile.read()
parser = PlaintextParser.from_string(data, Tokenizer('english'))
summarizer = LsaSummarizer(stem_word)
summarizer.stop_words = get_stop_words("english")
summary = ""
for sentence in summarizer(parser.document, num_sentences):
summary += sentence.__unicode__().encode('ascii', 'ignore').replace('\"', '').replace('\'', '').strip() + " "
return summary
示例12: test_article_example
# 需要导入模块: from sumy.summarizers.lsa import LsaSummarizer [as 别名]
# 或者: from sumy.summarizers.lsa.LsaSummarizer import stop_words [as 别名]
def test_article_example():
"""Source: http://www.prevko.cz/dite/skutecne-pribehy-deti"""
parser = PlaintextParser.from_string(
load_resource("articles/prevko_cz_1.txt"),
Tokenizer("czech")
)
summarizer = LsaSummarizer(Stemmer("czech"))
summarizer.stop_words = get_stop_words("czech")
sentences = summarizer(parser.document, 20)
assert len(sentences) == 20
示例13: summarize
# 需要导入模块: from sumy.summarizers.lsa import LsaSummarizer [as 别名]
# 或者: from sumy.summarizers.lsa.LsaSummarizer import stop_words [as 别名]
def summarize(text):
parser = PlaintextParser.from_string(text, Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
result = ""
for sentence in summarizer(parser.document, SENTENCES_COUNT):
result += str(sentence) + " "
return result
示例14: test_real_example
# 需要导入模块: from sumy.summarizers.lsa import LsaSummarizer [as 别名]
# 或者: from sumy.summarizers.lsa.LsaSummarizer import stop_words [as 别名]
def test_real_example(self):
"""Source: http://www.prevko.cz/dite/skutecne-pribehy-deti"""
parser = PlaintextParser.from_string(
load_resource("snippets/prevko.txt"),
Tokenizer("czech")
)
summarizer = LsaSummarizer(Stemmer("czech"))
summarizer.stop_words = get_stop_words("czech")
sentences = summarizer(parser.document, 2)
self.assertEqual(len(sentences), 2)
示例15: lsa
# 需要导入模块: from sumy.summarizers.lsa import LsaSummarizer [as 别名]
# 或者: from sumy.summarizers.lsa.LsaSummarizer import stop_words [as 别名]
def lsa(self,text_parser):
assert isinstance(text_parser,plaintext.PlaintextParser)
#process the text
summarizer=LSA()
#EnglishStemmer())
#summarizer.stop_words=stopwords.words("english")
#we have to specify stop words
summarizer.stop_words=get_stop_words(settings.SUMMARIZER_LANGUAGE)
return summarizer(text_parser.document,settings.SUMMARIZER_TOP_X_SENTENCES)