本文整理匯總了Python中result.Result.accumulate方法的典型用法代碼示例。如果您正苦於以下問題:Python Result.accumulate方法的具體用法?Python Result.accumulate怎麽用?Python Result.accumulate使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類result.Result
的用法示例。
在下文中一共展示了Result.accumulate方法的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: process_file
# 需要導入模塊: from result import Result [as 別名]
# 或者: from result.Result import accumulate [as 別名]
def process_file(self, file_path, file_encoding='utf-8-sig'):
r = Result()
n_sentence_total_number = 1
n_term_total_number = 0
n_term_numerals = 0
n_term_in_dic = 0
n_term_len_gte6 = 0
n_term_len_gte4 = 0
n_term_latin = 0
n_term_at_mention = 0
n_term_emotion = 0
n_term_hashtag = 0
n_term_url = 0
with codecs.open(file_path, 'r', encoding=file_encoding) as fp:
for line in fp:
line = line.strip(' \t\r\n').encode('utf-8')
if len(line)<1: continue
n_term_numerals += len( find_numeral(line) )
n_term_at_mention += len( find_at_mention(line) )
n_term_emotion += len( find_emotions(line) )
n_term_hashtag += len( find_hashtag(line) )
n_term_emotion += len( find_url(line) )
lst = nlpir.Seg(line)
for t in lst:
term = t[0].decode('utf-8','ignore')
POS = t[1]
n_term_total_number += 1
if is_sentence_separator(term): #如果是句子分隔符,句子數自增
n_sentence_total_number += 1
else:
if is_latin(term): n_term_latin += 1
if len(term)>=6: n_term_len_gte6 += 1
if len(term)>=4: n_term_len_gte4 += 1
tags = get_term_tags(term)
if len(tags)>0:
n_term_in_dic += 1
for tag in tags:
r.accumulate(tag)
if self.enablePOS: r.accumulate('POS/%s'%POS)
r.accumulate('stat/WordCount', value=n_term_total_number)
if n_term_total_number == 0: n_term_total_number=float('NaN')
r.accumulate('stat/WordPerSentence', value=n_term_total_number/n_sentence_total_number)
r.accumulate('stat/DicCoverRate', value=n_term_in_dic/n_term_total_number)
r.accumulate('stat/Numerals', value=n_term_numerals/n_term_total_number)
r.accumulate('stat/SixLtr', value=n_term_len_gte6/n_term_total_number)
r.accumulate('stat/FourCharWord', value=n_term_len_gte4/n_term_total_number)
r.accumulate('stat/Latin', value=n_term_latin/n_term_total_number)
r.accumulate('stat/AtMention', value=n_term_at_mention)
r.accumulate('stat/Emotion', value=n_term_emotion)
r.accumulate('stat/HashTag', value=n_term_hashtag)
r.accumulate('stat/URLs', value=n_term_url)
return r
示例2: process_iterator
# 需要導入模塊: from result import Result [as 別名]
# 或者: from result.Result import accumulate [as 別名]
def process_iterator(lst_original, lst_segged, to_ration=True, enable_pos=False, encoding='utf-8'):
"""used to process list of paragraphs"""
if len(lst_original) != len(lst_segged):
raise ValueError('The length of lst_original and lst_segged should be equal!')
r = Result()
n_sentence_total_number = 1
n_term_total_number = 0
n_term_numerals = 0
n_term_in_dic = 0
n_term_len_gte6 = 0
n_term_len_gte4 = 0
n_term_latin = 0
n_term_at_mention = 0
n_term_emotion = 0
n_term_hashtag = 0
n_term_url = 0
for line, seg_str in zip(lst_original, lst_segged):
line = line.strip(' \t\r\n') # .encode('utf-8')
if len(line) < 1:
continue
n_term_numerals += len(find_numeral(line))
n_term_at_mention += len(find_at_mention(line))
n_term_emotion += len(find_emotions(line))
n_term_hashtag += len(find_hashtag(line))
n_term_url += len(find_url(line))
segged_terms = default_seg(seg_str)
for t in segged_terms:
term = t[0].decode(encoding, 'ignore')
pos = t[1]
n_term_total_number += 1
if is_sentence_separator(term): # 如果是句子分隔符,句子數自增
n_sentence_total_number += 1
else:
if is_latin(term): n_term_latin += 1
if len(term) >= 6: n_term_len_gte6 += 1
if len(term) >= 4: n_term_len_gte4 += 1
tags = get_term_tags(term)
if len(tags) > 0:
n_term_in_dic += 1
for tag in tags:
r.accumulate(tag)
if enable_pos:
r.accumulate('POS/%s' % pos)
r.accumulate('stat/WordCount', value=n_term_total_number)
if n_term_total_number == 0: n_term_total_number = float('NaN')
r.accumulate('stat/WordPerSentence', value=float(n_term_total_number) / n_sentence_total_number)
r.accumulate('stat/RateDicCover', value=float(n_term_in_dic) / n_term_total_number)
r.accumulate('stat/RateNumeral', value=float(n_term_numerals) / n_term_total_number)
r.accumulate('stat/RateSixLtrWord', value=float(n_term_len_gte6) / n_term_total_number)
r.accumulate('stat/RateFourCharWord', value=float(n_term_len_gte4) / n_term_total_number)
r.accumulate('stat/RateLatinWord', value=float(n_term_latin) / n_term_total_number)
r.accumulate('stat/NumAtMention', value=n_term_at_mention)
r.accumulate('stat/NumEmotion', value=n_term_emotion)
r.accumulate('stat/NumHashTag', value=n_term_hashtag)
r.accumulate('stat/NumURLs', value=n_term_url)
return r.to_list(to_ratio)
示例3: process_paragraph
# 需要導入模塊: from result import Result [as 別名]
# 或者: from result.Result import accumulate [as 別名]
def process_paragraph(self, paragraph, encoding='utf-8'):
r = Result()
n_sentence_total_number = 1
n_term_total_number = 0
n_term_numerals = len( find_numeral(paragraph) )
n_term_in_dic = 0
n_term_len_gte6 = 0
n_term_len_gte4 = 0
n_term_latin = 0
n_term_at_mention = len( find_at_mention(paragraph) )
n_term_emotion = len( find_emotions(paragraph) )
n_term_hashtag = len( find_hashtag(paragraph) )
n_term_url = len( find_url(paragraph) )
lst = nlpir.Seg(paragraph)
for t in lst:
term = t[0].decode('utf-8','ignore')
POS = t[1]
n_term_total_number += 1
if is_sentence_separator(term): #如果是句子分隔符,句子數自增
n_sentence_total_number += 1
else:
if is_latin(term): n_term_latin += 1
if len(term)>=6: n_term_len_gte6 += 1
if len(term)>=4: n_term_len_gte4 += 1
tags = get_term_tags(term)
if len(tags)>0:
n_term_in_dic += 1
for tag in tags:
r.accumulate(tag)
if self.enablePOS: r.accumulate('POS/%s'%POS)
r.accumulate('stat/WordCount', value=n_term_total_number)
if n_term_total_number == 0: n_term_total_number=float('NaN')
r.accumulate('stat/WordPerSentence', value= float(n_term_total_number)/n_sentence_total_number)
r.accumulate('stat/RateDicCover', value= float(n_term_in_dic)/n_term_total_number)
r.accumulate('stat/RateNumeral', value= float(n_term_numerals)/n_term_total_number)
r.accumulate('stat/RateSixLtrWord', value= float(n_term_len_gte6)/n_term_total_number)
r.accumulate('stat/RateFourCharWord', value= float(n_term_len_gte4)/n_term_total_number)
r.accumulate('stat/RateLatinWord', value= float(n_term_latin)/n_term_total_number)
r.accumulate('stat/NumAtMention', value=n_term_at_mention)
r.accumulate('stat/NumEmotion', value=n_term_emotion)
r.accumulate('stat/NumHashTag', value=n_term_hashtag)
r.accumulate('stat/NumURLs', value=n_term_url)
return r
示例4: process_iterator
# 需要導入模塊: from result import Result [as 別名]
# 或者: from result.Result import accumulate [as 別名]
def process_iterator(iterator, segmentor=default_seg, enable_pos=True, encoding='utf-8'):
r = Result()
n_sentence_total_number = 1
n_term_total_number = 0
n_term_numerals = 0
n_term_in_dic = 0
n_term_len_gte6 = 0
n_term_len_gte4 = 0
n_term_latin = 0
n_term_at_mention = 0
n_term_emotion = 0
n_term_hashtag = 0
n_term_url = 0
for line in iterator:
line = line.strip(' \t\r\n') # .encode('utf-8')
if len(line) < 1:
continue
n_term_numerals += len( find_numeral(line) )
n_term_at_mention += len( find_at_mention(line) )
n_term_emotion += len( find_emotions(line) )
n_term_hashtag += len( find_hashtag(line) )
n_term_url += len( find_url(line) )
segged_terms = segmentor(line)
for term, pos in segged_terms:
term = term.decode(encoding, 'ignore')
n_term_total_number += 1
if is_sentence_separator(term): # 如果是句子分隔符,句子數自增
n_sentence_total_number += 1
else:
if is_latin(term): n_term_latin += 1
if len(term) >= 6: n_term_len_gte6 += 1
if len(term) >= 4: n_term_len_gte4 += 1
tags = get_term_tags(term)
if len(tags) > 0:
n_term_in_dic += 1
for tag in tags:
r.accumulate(tag)
if enable_pos:
r.accumulate('POS/%s' % pos)
r.accumulate('stat/WordCount', value=n_term_total_number)
if n_term_total_number == 0: n_term_total_number = float('NaN')
r.accumulate('stat/WordPerSentence', value=float(n_term_total_number)/n_sentence_total_number)
r.accumulate('stat/RateDicCover', value=float(n_term_in_dic)/n_term_total_number)
r.accumulate('stat/RateNumeral', value=float(n_term_numerals)/n_term_total_number)
r.accumulate('stat/RateSixLtrWord', value=float(n_term_len_gte6)/n_term_total_number)
r.accumulate('stat/RateFourCharWord', value=float(n_term_len_gte4)/n_term_total_number)
r.accumulate('stat/RateLatinWord', value=float(n_term_latin)/n_term_total_number)
r.accumulate('stat/NumAtMention', value=n_term_at_mention)
r.accumulate('stat/NumEmotion', value=n_term_emotion)
r.accumulate('stat/NumHashTag', value=n_term_hashtag)
r.accumulate('stat/NumURLs', value=n_term_url)
return r