當前位置: 首頁>>代碼示例>>Python>>正文


Python Result.accumulate方法代碼示例

本文整理匯總了Python中result.Result.accumulate方法的典型用法代碼示例。如果您正苦於以下問題:Python Result.accumulate方法的具體用法?Python Result.accumulate怎麽用?Python Result.accumulate使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在result.Result的用法示例。


在下文中一共展示了Result.accumulate方法的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: process_file

# 需要導入模塊: from result import Result [as 別名]
# 或者: from result.Result import accumulate [as 別名]
    def process_file(self, file_path, file_encoding='utf-8-sig'):
        r = Result()

        n_sentence_total_number = 1
        n_term_total_number = 0
        n_term_numerals = 0
        n_term_in_dic = 0
        n_term_len_gte6 = 0

        n_term_len_gte4 = 0
        n_term_latin = 0

        n_term_at_mention = 0
        n_term_emotion = 0
        n_term_hashtag = 0
        n_term_url = 0

        with codecs.open(file_path, 'r', encoding=file_encoding) as fp:
            for line in fp:
                line = line.strip(' \t\r\n').encode('utf-8')
                if len(line)<1: continue

                n_term_numerals += len( find_numeral(line) )
                n_term_at_mention += len( find_at_mention(line) )
                n_term_emotion += len( find_emotions(line) )
                n_term_hashtag += len( find_hashtag(line) )
                n_term_emotion += len( find_url(line) )

                lst = nlpir.Seg(line)
                for t in lst:
                    term = t[0].decode('utf-8','ignore')
                    POS = t[1]

                    n_term_total_number += 1

                    if is_sentence_separator(term):    #如果是句子分隔符,句子數自增
                        n_sentence_total_number += 1
                    else:
                        if is_latin(term): n_term_latin += 1
                        if len(term)>=6:  n_term_len_gte6 += 1
                        if len(term)>=4:  n_term_len_gte4 += 1

                    tags = get_term_tags(term)
                    if len(tags)>0:
                        n_term_in_dic += 1
                        for tag in tags:
                            r.accumulate(tag)

                    if self.enablePOS: r.accumulate('POS/%s'%POS)

        r.accumulate('stat/WordCount', value=n_term_total_number)
        if n_term_total_number == 0: n_term_total_number=float('NaN')

        r.accumulate('stat/WordPerSentence', value=n_term_total_number/n_sentence_total_number)
        r.accumulate('stat/DicCoverRate', value=n_term_in_dic/n_term_total_number)
        r.accumulate('stat/Numerals', value=n_term_numerals/n_term_total_number)
        r.accumulate('stat/SixLtr', value=n_term_len_gte6/n_term_total_number)

        r.accumulate('stat/FourCharWord', value=n_term_len_gte4/n_term_total_number)
        r.accumulate('stat/Latin', value=n_term_latin/n_term_total_number)

        r.accumulate('stat/AtMention', value=n_term_at_mention)
        r.accumulate('stat/Emotion', value=n_term_emotion)
        r.accumulate('stat/HashTag', value=n_term_hashtag)
        r.accumulate('stat/URLs', value=n_term_url)

        return r
開發者ID:zhang-zhan,項目名稱:miner,代碼行數:69,代碼來源:wenxin.py

示例2: process_iterator

# 需要導入模塊: from result import Result [as 別名]
# 或者: from result.Result import accumulate [as 別名]
def process_iterator(lst_original, lst_segged, to_ration=True, enable_pos=False, encoding='utf-8'):
    """used to process list of paragraphs"""
    if len(lst_original) != len(lst_segged):
        raise ValueError('The length of lst_original and lst_segged should be equal!')

    r = Result()

    n_sentence_total_number = 1
    n_term_total_number = 0
    n_term_numerals = 0
    n_term_in_dic = 0
    n_term_len_gte6 = 0

    n_term_len_gte4 = 0
    n_term_latin = 0

    n_term_at_mention = 0
    n_term_emotion = 0
    n_term_hashtag = 0
    n_term_url = 0

    for line, seg_str in zip(lst_original, lst_segged):
        line = line.strip(' \t\r\n')  # .encode('utf-8')
        if len(line) < 1:
            continue

        n_term_numerals += len(find_numeral(line))
        n_term_at_mention += len(find_at_mention(line))
        n_term_emotion += len(find_emotions(line))
        n_term_hashtag += len(find_hashtag(line))
        n_term_url += len(find_url(line))

        segged_terms = default_seg(seg_str)
        for t in segged_terms:
            term = t[0].decode(encoding, 'ignore')
            pos = t[1]

            n_term_total_number += 1

            if is_sentence_separator(term):  # 如果是句子分隔符,句子數自增
                n_sentence_total_number += 1
            else:
                if is_latin(term): n_term_latin += 1
                if len(term) >= 6: n_term_len_gte6 += 1
                if len(term) >= 4: n_term_len_gte4 += 1

            tags = get_term_tags(term)
            if len(tags) > 0:
                n_term_in_dic += 1
                for tag in tags:
                    r.accumulate(tag)

            if enable_pos:
                r.accumulate('POS/%s' % pos)

    r.accumulate('stat/WordCount', value=n_term_total_number)
    if n_term_total_number == 0: n_term_total_number = float('NaN')

    r.accumulate('stat/WordPerSentence', value=float(n_term_total_number) / n_sentence_total_number)
    r.accumulate('stat/RateDicCover', value=float(n_term_in_dic) / n_term_total_number)
    r.accumulate('stat/RateNumeral', value=float(n_term_numerals) / n_term_total_number)
    r.accumulate('stat/RateSixLtrWord', value=float(n_term_len_gte6) / n_term_total_number)

    r.accumulate('stat/RateFourCharWord', value=float(n_term_len_gte4) / n_term_total_number)
    r.accumulate('stat/RateLatinWord', value=float(n_term_latin) / n_term_total_number)

    r.accumulate('stat/NumAtMention', value=n_term_at_mention)
    r.accumulate('stat/NumEmotion', value=n_term_emotion)
    r.accumulate('stat/NumHashTag', value=n_term_hashtag)
    r.accumulate('stat/NumURLs', value=n_term_url)

    return r.to_list(to_ratio)
開發者ID:CCPLab,項目名稱:miner,代碼行數:74,代碼來源:wenxin_noseg.py

示例3: process_paragraph

# 需要導入模塊: from result import Result [as 別名]
# 或者: from result.Result import accumulate [as 別名]
    def process_paragraph(self, paragraph, encoding='utf-8'):
        r = Result()

        n_sentence_total_number = 1
        n_term_total_number = 0
        n_term_numerals = len( find_numeral(paragraph) )
        n_term_in_dic = 0
        n_term_len_gte6 = 0

        n_term_len_gte4 = 0
        n_term_latin = 0

        n_term_at_mention = len( find_at_mention(paragraph) )
        n_term_emotion = len( find_emotions(paragraph) )
        n_term_hashtag = len( find_hashtag(paragraph) )
        n_term_url = len( find_url(paragraph) )

        lst = nlpir.Seg(paragraph)
        for t in lst:
            term = t[0].decode('utf-8','ignore')
            POS = t[1]

            n_term_total_number += 1

            if is_sentence_separator(term):    #如果是句子分隔符,句子數自增
                n_sentence_total_number += 1
            else:
                if is_latin(term): n_term_latin += 1
                if len(term)>=6:  n_term_len_gte6 += 1
                if len(term)>=4:  n_term_len_gte4 += 1

            tags = get_term_tags(term)
            if len(tags)>0:
                n_term_in_dic += 1
                for tag in tags:
                    r.accumulate(tag)

            if self.enablePOS: r.accumulate('POS/%s'%POS)

        r.accumulate('stat/WordCount', value=n_term_total_number)
        if n_term_total_number == 0: n_term_total_number=float('NaN')

        r.accumulate('stat/WordPerSentence', value= float(n_term_total_number)/n_sentence_total_number)
        r.accumulate('stat/RateDicCover', value= float(n_term_in_dic)/n_term_total_number)
        r.accumulate('stat/RateNumeral', value= float(n_term_numerals)/n_term_total_number)
        r.accumulate('stat/RateSixLtrWord', value= float(n_term_len_gte6)/n_term_total_number)

        r.accumulate('stat/RateFourCharWord', value= float(n_term_len_gte4)/n_term_total_number)
        r.accumulate('stat/RateLatinWord', value= float(n_term_latin)/n_term_total_number)

        r.accumulate('stat/NumAtMention', value=n_term_at_mention)
        r.accumulate('stat/NumEmotion', value=n_term_emotion)
        r.accumulate('stat/NumHashTag', value=n_term_hashtag)
        r.accumulate('stat/NumURLs', value=n_term_url)

        return r
開發者ID:zhang-zhan,項目名稱:miner,代碼行數:58,代碼來源:wenxin.py

示例4: process_iterator

# 需要導入模塊: from result import Result [as 別名]
# 或者: from result.Result import accumulate [as 別名]
def process_iterator(iterator, segmentor=default_seg, enable_pos=True, encoding='utf-8'):
    r = Result()

    n_sentence_total_number = 1
    n_term_total_number = 0
    n_term_numerals = 0
    n_term_in_dic = 0
    n_term_len_gte6 = 0

    n_term_len_gte4 = 0
    n_term_latin = 0

    n_term_at_mention = 0
    n_term_emotion = 0
    n_term_hashtag = 0
    n_term_url = 0

    for line in iterator:
        line = line.strip(' \t\r\n')  # .encode('utf-8')
        if len(line) < 1:
            continue

        n_term_numerals += len( find_numeral(line) )
        n_term_at_mention += len( find_at_mention(line) )
        n_term_emotion += len( find_emotions(line) )
        n_term_hashtag += len( find_hashtag(line) )
        n_term_url += len( find_url(line) )

        segged_terms = segmentor(line)
        for term, pos in segged_terms:
            term = term.decode(encoding, 'ignore')

            n_term_total_number += 1

            if is_sentence_separator(term):    # 如果是句子分隔符,句子數自增
                n_sentence_total_number += 1
            else:
                if is_latin(term): n_term_latin += 1
                if len(term) >= 6: n_term_len_gte6 += 1
                if len(term) >= 4: n_term_len_gte4 += 1

            tags = get_term_tags(term)
            if len(tags) > 0:
                n_term_in_dic += 1
                for tag in tags:
                    r.accumulate(tag)

            if enable_pos:
                r.accumulate('POS/%s' % pos)

    r.accumulate('stat/WordCount', value=n_term_total_number)
    if n_term_total_number == 0: n_term_total_number = float('NaN')

    r.accumulate('stat/WordPerSentence', value=float(n_term_total_number)/n_sentence_total_number)
    r.accumulate('stat/RateDicCover', value=float(n_term_in_dic)/n_term_total_number)
    r.accumulate('stat/RateNumeral', value=float(n_term_numerals)/n_term_total_number)
    r.accumulate('stat/RateSixLtrWord', value=float(n_term_len_gte6)/n_term_total_number)

    r.accumulate('stat/RateFourCharWord', value=float(n_term_len_gte4)/n_term_total_number)
    r.accumulate('stat/RateLatinWord', value=float(n_term_latin)/n_term_total_number)

    r.accumulate('stat/NumAtMention', value=n_term_at_mention)
    r.accumulate('stat/NumEmotion', value=n_term_emotion)
    r.accumulate('stat/NumHashTag', value=n_term_hashtag)
    r.accumulate('stat/NumURLs', value=n_term_url)

    return r
開發者ID:CCPLab,項目名稱:miner,代碼行數:69,代碼來源:wenxin.py


注:本文中的result.Result.accumulate方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。