本文整理汇总了Python中sumy.summarizers.lsa.LsaSummarizer类的典型用法代码示例。如果您正苦于以下问题:Python LsaSummarizer类的具体用法?Python LsaSummarizer怎么用?Python LsaSummarizer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了LsaSummarizer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: summarizeFile
def summarizeFile(inputFile):
summarizer = LsaSummarizer(stem_word)
summarizer.stop_words = get_stop_words("english")
url = findURLS(inputFile)
if url != None:
if url[-1] == '.':
url = url[0:-1]
#print (url)
#urlContent = 'Summary from URL ['+url+']: \n'
urlContent = ''
try:
parser = HtmlParser.from_url(url, Tokenizer("english"))
for sentence in summarizer(parser.document, 3):
urlContent = urlContent + str(sentence) + '\n'
except:
#print (sys.exc_info()[0])
urlContent = ''
content = inputFile.read()
parser = PlaintextParser.from_string(content, Tokenizer(LANGUAGE))
#summarizer = LsaSummarizer(stem_word)
#summarizer.stop_words = get_stop_words(LANGUAGE)
#summary = 'Event Summary: \n'
summary = ''
try:
for sentence in summarizer(parser.document, SENTENCES_COUNT_1):
summary = summary + str(sentence) + '\n'
except AssertionError:
return None
if url != None:
return summary + urlContent
return summary
示例2: test_single_sentence
def test_single_sentence(self):
document = build_document(("I am the sentence you like",))
summarizer = LsaSummarizer()
summarizer.stopwords = ("I", "am", "the",)
sentences = summarizer(document, 10)
self.assertEqual(len(sentences), 1)
self.assertEqual(to_unicode(sentences[0]), "I am the sentence you like")
示例3: test_single_sentence
def test_single_sentence():
document = build_document(("I am the sentence you like",))
summarizer = LsaSummarizer()
summarizer.stopwords = ("I", "am", "the",)
sentences = summarizer(document, 10)
assert len(sentences) == 1
assert to_unicode(sentences[0]) == "I am the sentence you like"
示例4: lsa
def lsa(comment,parser,num):
summarizer = LsaSummarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
LSAstr = ''
for sentence in summarizer(parser.document,num):
LSAstr += str(sentence)
return LSAstr
示例5: summarize
def summarize(filename, num_sentences):
with open (filename, "r") as myfile:
data=myfile.read()
parser = PlaintextParser.from_string(data, Tokenizer('english'))
summarizer = LsaSummarizer(stem_word)
summarizer.stop_words = get_stop_words("english")
summary = ""
for sentence in summarizer(parser.document, num_sentences):
summary += sentence.__unicode__().encode('ascii', 'ignore').replace('\"', '').replace('\'', '').strip() + " "
return summary
示例6: summary
def summary(self, int1, int2):
# int1, int2 are the places between which to look for
# the summary to be taken (slicing the corpus as a string)
parser = PlaintextParser(self.corpus[int1:int2], Tokenizer("english"))
summarizer = LsaSummarizer(stem_word)
summarizer.stop_words = get_stop_words("english")
self.summary_text = " ".join(
map(lambda x:x._text,
summarizer(parser.document, 20)))
return self.summary_text
示例7: test_issue_5_sigma_can_multiply_matrix_v
def test_issue_5_sigma_can_multiply_matrix_v(self):
"""Source: https://github.com/miso-belica/sumy/issues/5"""
parser = PlaintextParser.from_string(
load_resource("articles/sigma_can_multiply_matrix_v.txt"),
Tokenizer("english")
)
summarizer = LsaSummarizer(english_stemmer)
summarizer.stop_words = get_stop_words("english")
sentences = summarizer(parser.document, 20)
self.assertEqual(len(sentences), 20)
示例8: test_article_example
def test_article_example():
"""Source: http://www.prevko.cz/dite/skutecne-pribehy-deti"""
parser = PlaintextParser.from_string(
load_resource("articles/prevko_cz_1.txt"),
Tokenizer("czech")
)
summarizer = LsaSummarizer(Stemmer("czech"))
summarizer.stop_words = get_stop_words("czech")
sentences = summarizer(parser.document, 20)
assert len(sentences) == 20
示例9: test_real_example
def test_real_example(self):
"""Source: http://www.prevko.cz/dite/skutecne-pribehy-deti"""
parser = PlaintextParser.from_string(
load_resource("snippets/prevko.txt"),
Tokenizer("czech")
)
summarizer = LsaSummarizer(Stemmer("czech"))
summarizer.stop_words = get_stop_words("czech")
sentences = summarizer(parser.document, 2)
self.assertEqual(len(sentences), 2)
示例10: main
def main(url, num_sentences=10, language='english'):
parser = HtmlParser.from_url(url, Tokenizer(language))
stemmer = Stemmer(language)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(language)
for sentence in summarizer(parser.document, num_sentences):
print(sentence)
示例11: summarize
def summarize(text, n_sentences, sep='\n'):
'''
Args:
text (str or file): text itself or file in memory of text
n_sentences (int): number of sentences to include in summary
Kwargs:
sep (str): separator to join summary sentences
Returns:
(str) n_sentences-long, automatically-produced summary of text
'''
if isinstance(text, str):
parser = PlaintextParser.from_string(text, Tokenizer(LANGUAGE))
elif isinstance(text, file):
parser = PlaintextParser.from_file(text, Tokenizer(LANGUAGE))
else:
raise TypeError('text must be either str or file')
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
return '\n'.join(str(s) for s in summarizer(parser.document, n_sentences))
示例12: summarize
def summarize(string, summary_length = 1, language = "english"):
string = string.lower() if string.isupper() else string
parser = PlaintextParser.from_string(string, Tokenizer(language))
stemmer = Stemmer(language)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(language)
return ". ".join([str(sentence) for sentence in summarizer(parser.document, summary_length)])
示例13: summarize
def summarize(text):
total = ""
parser = PlaintextParser.from_string(text, Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
for sentence in summarizer(parser.document, SENTENCES_COUNT):
total += str(sentence)
return total
示例14: summarizeText
def summarizeText(self, body, numSentences = 10):
"""Summarizes body of text to numSentences
"""
#parser = PlaintextParser.from_string(body, Tokenizer(self.LANG))
parser = PlaintextParser.from_string(body, Tokenizer(self.LANG))
stemmer = Stemmer(self.LANG)
summarizer = SumySummarizer(stemmer)
summarizer.stop_words = get_stop_words(self.LANG)
summary = ' '.join([str(sentence).decode('utf-8') for sentence in summarizer(parser.document, numSentences)])
return summary
示例15: summarize
def summarize(content):
parser = PlaintextParser.from_string(content.body, Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
text = '\n'.join(
[str(sentence) for sentence in summarizer(parser.document, COUNT)]
)
summary = Summary(content=content, summary=text)
summary.save()