当前位置: 首页>>代码示例>>Python>>正文


Python PlaintextParser.from_file方法代码示例

本文整理汇总了Python中sumy.parsers.plaintext.PlaintextParser.from_file方法的典型用法代码示例。如果您正苦于以下问题:Python PlaintextParser.from_file方法的具体用法?Python PlaintextParser.from_file怎么用?Python PlaintextParser.from_file使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sumy.parsers.plaintext.PlaintextParser的用法示例。


在下文中一共展示了PlaintextParser.from_file方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _firstK_score

# 需要导入模块: from sumy.parsers.plaintext import PlaintextParser [as 别名]
# 或者: from sumy.parsers.plaintext.PlaintextParser import from_file [as 别名]
def _firstK_score(storyName, highlightName):
    parser = PlaintextParser.from_file(storyName, Tokenizer(LANGUAGE))

    geneSen = parser.document.sentences[:SENTENCES_COUNT]
    refSen = PlaintextParser.from_file(highlightName, Tokenizer(LANGUAGE)).document.sentences

    # print geneSen
    # print "=========="
    # print refSen
    # print evaluate(geneSen, refSen)
    try:
        return evaluate(geneSen, refSen)
    except Exception as e:
        print storyName
        print e
        raise e
开发者ID:KevinWangTHU,项目名称:data_stat,代码行数:18,代码来源:summary.py

示例2: summarize

# 需要导入模块: from sumy.parsers.plaintext import PlaintextParser [as 别名]
# 或者: from sumy.parsers.plaintext.PlaintextParser import from_file [as 别名]
def summarize(text, n_sentences, sep='\n'):
    '''
    Args:
        text (str or file): text itself or file in memory of text
        n_sentences (int): number of sentences to include in summary

    Kwargs:
        sep (str): separator to join summary sentences

    Returns:
        (str) n_sentences-long, automatically-produced summary of text
    '''

    if isinstance(text, str):
        parser = PlaintextParser.from_string(text, Tokenizer(LANGUAGE))
    elif isinstance(text, file):
        parser = PlaintextParser.from_file(text, Tokenizer(LANGUAGE))
    else:
        raise TypeError('text must be either str or file')

    stemmer = Stemmer(LANGUAGE)

    summarizer = Summarizer(stemmer)
    summarizer.stop_words = get_stop_words(LANGUAGE)

    return '\n'.join(str(s) for s in summarizer(parser.document, n_sentences))
开发者ID:mtpain,项目名称:iatv,代码行数:28,代码来源:iatv.py

示例3: kl_rank_sum

# 需要导入模块: from sumy.parsers.plaintext import PlaintextParser [as 别名]
# 或者: from sumy.parsers.plaintext.PlaintextParser import from_file [as 别名]
def kl_rank_sum(path, K):
    filename = path
    K = K
    parser = PlaintextParser.from_file(filename, Tokenizer("english"))
    summarizer = LexRankSummarizer()
    summary = summarizer(parser.document, K) #number of sentences in parenthecies
    return summary
开发者ID:danskey,项目名称:matters_of_concern,代码行数:9,代码来源:sumText.py

示例4: lex_rank_sum

# 需要导入模块: from sumy.parsers.plaintext import PlaintextParser [as 别名]
# 或者: from sumy.parsers.plaintext.PlaintextParser import from_file [as 别名]
def lex_rank_sum(path, L):
    filename = path
    L = L
    output = []
    parser = PlaintextParser.from_file(filename, Tokenizer("english"))
    summarizer = LexRankSummarizer()
    summary = summarizer(parser.document, L) #number of sentences in parenthecies
    for sentence in summary: # option for writing to a summary output file.
        item = str(sentence)
        output.append(item)
    return output
开发者ID:danskey,项目名称:matters_of_concern,代码行数:13,代码来源:sumText.py

示例5: _summ_score

# 需要导入模块: from sumy.parsers.plaintext import PlaintextParser [as 别名]
# 或者: from sumy.parsers.plaintext.PlaintextParser import from_file [as 别名]
def _summ_score(storyName, highlightName):
    parser = PlaintextParser.from_file(storyName, Tokenizer(LANGUAGE))
    stemmer = Stemmer(LANGUAGE)

    summarizer = Summarizer(stemmer)
    summarizer.stop_words = get_stop_words(LANGUAGE)

    geneSen = summarizer(parser.document, SENTENCES_COUNT)
    refSen = PlaintextParser.from_file(highlightName, Tokenizer(LANGUAGE)).document.sentences


    #print geneSen
    #print "=========="
    #print refSen
    try:
        return evaluate(geneSen, refSen)
    except Exception as e:
        print storyName
        print e
        raise e
开发者ID:KevinWangTHU,项目名称:data_stat,代码行数:22,代码来源:summary.py

示例6: textrankReferenceSummary

# 需要导入模块: from sumy.parsers.plaintext import PlaintextParser [as 别名]
# 或者: from sumy.parsers.plaintext.PlaintextParser import from_file [as 别名]
def textrankReferenceSummary(path):	
	sentencesList=[]
	parser = PlaintextParser.from_file(path, Tokenizer(LANGUAGE))
	stemmer = Stemmer(LANGUAGE)
	summarizer = TextRankSummarizer(stemmer)
	summarizer.stop_words = get_stop_words(LANGUAGE)
	

	for sentence in summarizer(parser.document, SENTENCES_COUNT):
		#print(sentence._text)
		sentencesList.append(sentence._text)

	return sentencesList
开发者ID:ab93,项目名称:Text-Summarization,代码行数:15,代码来源:api.py

示例7: _score

# 需要导入模块: from sumy.parsers.plaintext import PlaintextParser [as 别名]
# 或者: from sumy.parsers.plaintext.PlaintextParser import from_file [as 别名]
def _score(storyName, highlightName):
    geneSen = PlaintextParser.from_file(storyName, Tokenizer(LANGUAGE)).document.sentences
    refSen = PlaintextParser.from_file(highlightName, Tokenizer(LANGUAGE)).document.sentences
    print "=============="
    for sen in refSen:
        print sen
    for gs in geneSen:
        r1 = []
        print gs
        for rs in refSen:
            r1.append(rouge_n([gs], [rs], 1))
        print r1

    # print geneSen[0]
    # print refSen[0], refSen[1]
    # try:
    #     print rouge_n([geneSen[0]], [refSen[0]], 1)
    #     print rouge_n([geneSen[0]], [refSen[0]], 2)
    #     print rouge_n([geneSen[0]], [refSen[1]], 1)
    #     print rouge_n([geneSen[0]], [refSen[1]], 2)
    # except ZeroDivisionError:
    #     pass
    raw_input()
开发者ID:KevinWangTHU,项目名称:data_stat,代码行数:25,代码来源:rouge.py

示例8: summarize_file

# 需要导入模块: from sumy.parsers.plaintext import PlaintextParser [as 别名]
# 或者: from sumy.parsers.plaintext.PlaintextParser import from_file [as 别名]
def summarize_file(file_name):
	#url = "http://www.zsstritezuct.estranky.cz/clanky/predmety/cteni/jak-naucit-dite-spravne-cist.html"
	#parser = HtmlParser.from_url(url, Tokenizer(LANGUAGE))
	# or for plain text files
	parser = PlaintextParser.from_file(file_name, Tokenizer(LANGUAGE))
	stemmer = Stemmer(LANGUAGE)

	summarizer = Summarizer(stemmer)
	summarizer.stop_words = get_stop_words(LANGUAGE)
	
	sentences = summarizer(parser.document, SENTENCES_COUNT)
	list_sentences = []
	for sentence in sentences:
		list_sentences.append(str(sentence))
	return list_sentences
开发者ID:tungnt55,项目名称:test_repo,代码行数:17,代码来源:post_process.py

示例9: get_smry

# 需要导入模块: from sumy.parsers.plaintext import PlaintextParser [as 别名]
# 或者: from sumy.parsers.plaintext.PlaintextParser import from_file [as 别名]
    def get_smry(self, input):
        smry_list = {}
        LANGUAGE = "english"
        SENTENCES_COUNT = 10
        parser = PlaintextParser.from_file(input, Tokenizer(LANGUAGE))
        stemmer = Stemmer(LANGUAGE)
    
        summarizer = Summarizer(stemmer)
        summarizer.stop_words = get_stop_words(LANGUAGE)

        i = 0
        for sentence in summarizer(parser.document, SENTENCES_COUNT):
            print(sentence)
            smry_list[str(i)] = str(sentence)
            i = i + 1
        return smry_list
开发者ID:gatesyp,项目名称:AngelHacks,代码行数:18,代码来源:generate.py

示例10: extract_summary_keywords

# 需要导入模块: from sumy.parsers.plaintext import PlaintextParser [as 别名]
# 或者: from sumy.parsers.plaintext.PlaintextParser import from_file [as 别名]
def extract_summary_keywords(trend,urls,titles):  
	total_articles_content=extract_text(urls)
	keywords=extract_keywords_from_all_text(total_articles_content,titles)
	current_path=os.path.dirname(os.path.realpath(__file__))
	current_path=current_path+'\\'+trend+'.txt'
	with open(current_path, 'w') as the_file:
	 	the_file.write(total_articles_content)
	parser = PlaintextParser.from_file(current_path, Tokenizer(LANGUAGE))
	os.remove(current_path)
	sentences=''
	for sentence in summarizer(parser.document, 12):
		sentences=sentences+' '+str(sentence) 
	replaced_syn=replacesynonym(sentences)
	matches = tool.check(sentences)
	correct_summary=language_check.correct(sentences, matches)
	return correct_summary,keywords
开发者ID:abhigenie92,项目名称:content_gen,代码行数:18,代码来源:main.py

示例11: createSummary

# 需要导入模块: from sumy.parsers.plaintext import PlaintextParser [as 别名]
# 或者: from sumy.parsers.plaintext.PlaintextParser import from_file [as 别名]
    def createSummary(self, input_file):
        parser = PlaintextParser.from_file(
            input_file, Tokenizer(self.__language))
        self.__sumySummarizer.stop_words = get_stop_words(self.__language)

        all_sentences = []
        for paragraph in parser.document.paragraphs:
            for sentence in paragraph.sentences:
                all_sentences.append(str(sentence))

        N = 5
        top_ranked_sentences = []
        for sentence in self.__sumySummarizer(parser.document, N):
            top_ranked_sentences.append(str(sentence))
        self.__summary = top_ranked_sentences

        for sentence in self.__sumySummarizer(parser.document, 1):
            self.__top = str(sentence)
开发者ID:andrewkoo0815,项目名称:SupremeBrief,代码行数:20,代码来源:sumy_learn_v2.py

示例12: use_sumy

# 需要导入模块: from sumy.parsers.plaintext import PlaintextParser [as 别名]
# 或者: from sumy.parsers.plaintext.PlaintextParser import from_file [as 别名]
 def use_sumy(input, SENTENCES_COUNT, method, parser_option):
     """Code to run sumy
     # Supported summarization methods:
     #    Luhn - heurestic method, reference
     #    Edmundson heurestic method with previous statistic research, reference
     #    Latent Semantic Analysis, LSA - one of the algorithm from http://scholar.google.com/citations?user=0fTuW_YAAAAJ&hl=en I think the author is using more advanced algorithms now. Steinberger, J. a Ježek, K. Using latent semantic an and summary evaluation. In In Proceedings ISIM '04. 2004. S. 93-100.
     #    LexRank - Unsupervised approach inspired by algorithms PageRank and HITS, reference
     #    TextRank - some sort of combination of a few resources that I found on the internet. I really don't remember the sources. Probably Wikipedia and some papers in 1st page of Google :)"""
     LANGUAGE = "english"
     #parser = HtmlParser.from_url(url, Tokenizer(LANGUAGE))
     if parser_option == 'file':
         parser = PlaintextParser.from_file(input, Tokenizer(LANGUAGE))
     elif parser_option == 'string':
         parser = PlaintextParser.from_string(input, Tokenizer(LANGUAGE))
     stemmer = Stemmer(LANGUAGE)
     
     summarizer = Summarizer(stemmer)
     summarizer.stop_words = get_stop_words(LANGUAGE)
     summary = []
     for sentence in summarizer(parser.document, SENTENCES_COUNT):
         summary.append(sentence)
     return summary
开发者ID:ViennaMike,项目名称:Directory-Summarizer,代码行数:24,代码来源:file_digester.py

示例13: create_summary

# 需要导入模块: from sumy.parsers.plaintext import PlaintextParser [as 别名]
# 或者: from sumy.parsers.plaintext.PlaintextParser import from_file [as 别名]
def create_summary(algorithm, input_file, output_file = "sumy_summary.txt"):
    
    # Set language
    LANGUAGE = "english"
    # Get top N ranked sentences
    N = 5

    stemmer = Stemmer(LANGUAGE)
    parser = PlaintextParser.from_file(input_file, Tokenizer(LANGUAGE))
    summarizer = create_summarizer(algorithm, stemmer)
    summarizer.stop_words = get_stop_words(LANGUAGE)

    all_sentences = []
    # Separate the paragraph into sentences
    for paragraph in parser.document.paragraphs:
        for sentence in paragraph.sentences:
            all_sentences.append(str(sentence))

    top_ranked_sentences = []
    # Use the summarizer to get the top ranked sentences
    for sentence in summarizer(parser.document, N):
        top_ranked_sentences.append(str(sentence))

    # Find the top ranked sentence
    for sentence in summarizer(parser.document, 1):
        top_sentence = str(sentence)
    
    # Find the position (between 0 to 4) of the top ranked sentence
    position = top_ranked_sentences.index(top_sentence)

    # Save the sentences into an output file
    # np.savetxt(output_file, top_ranked_sentences)
    record = open(output_file, "w")
    for i in range(len(top_ranked_sentences)):
        record.write(top_ranked_sentences[i]+ 'XXXXXX')
    record.write(str(position)+ 'XXXXXX')
    record.close()
开发者ID:andrewkoo0815,项目名称:SupremeBrief,代码行数:39,代码来源:sumy_learn.py

示例14: summarizer

# 需要导入模块: from sumy.parsers.plaintext import PlaintextParser [as 别名]
# 或者: from sumy.parsers.plaintext.PlaintextParser import from_file [as 别名]
    sys.setdefaultencoding('utf8')
    """
    nltk.data.path.append('/home/kariminf/Data/NLTK/')



    for sentence in summarizer(parser.document, SENTENCES_COUNT):
        print(sentence)
    """

    file = open(SIZE_FILE, 'r')
    while 1:
        line = file.readline()
        if line == '':
			break;
        parts = line.split(",")
        sizes[parts[0]] = int(parts[1])
    file.close()

    nltk.data.path.append('/home/kariminf/Data/NLTK/')
    for eval in sizes:
    	txt_path = "src/body/text/en/" + eval
        parser = PlaintextParser.from_file(txt_path, Tokenizer(LANGUAGE))
        stemmer = Stemmer(LANGUAGE)
        summarizer = Summarizer(stemmer)
        summarizer.stop_words = get_stop_words(LANGUAGE)
        summary = extract(summarizer, sizes[eval])
        fout = open("baselines/EdmundsonSummarizer/en/" + eval[:-9] + ".txt", "w")
        fout.write(summary)
        fout.close()
开发者ID:kariminf,项目名称:AllSummarizer,代码行数:32,代码来源:base_gen_sumy.py

示例15: len

# 需要导入模块: from sumy.parsers.plaintext import PlaintextParser [as 别名]
# 或者: from sumy.parsers.plaintext.PlaintextParser import from_file [as 别名]
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lex_rank import LexRankSummarizer #We're choosing Lexrank, other algorithms are also built in
from sumy.summarizers.lsa import LsaSummarizer
from unidecode import unidecode
from wikisum.wikisum import Crawler
import RAKE
import sys

if len(sys.argv) != 3:
    raise StandardError("usage: python summarize.py filename.txt num_sentences")

crawl = Crawler()
file_name = sys.argv[1] #name of the plain-text file
num_sentences = int(sys.argv[2])

parser = PlaintextParser.from_file(file_name, Tokenizer("english"))

wordCount = 0

for paragraph in parser.document.paragraphs:
    for sentence in paragraph.sentences:
        for word in sentence.words:
            wordCount += 1

results = {"LsaSummary":"", "LexRankSummary":""};

# LSA SUMMARY
summarizer = LsaSummarizer()
summary = summarizer(parser.document, num_sentences)

for sentence in summary:
开发者ID:TPeterW,项目名称:summariser,代码行数:33,代码来源:summarize.py


注:本文中的sumy.parsers.plaintext.PlaintextParser.from_file方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。