當前位置: 首頁>>代碼示例>>Python>>正文


Python tag.Mecab方法代碼示例

本文整理匯總了Python中konlpy.tag.Mecab方法的典型用法代碼示例。如果您正苦於以下問題:Python tag.Mecab方法的具體用法?Python tag.Mecab怎麽用?Python tag.Mecab使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在konlpy.tag的用法示例。


在下文中一共展示了tag.Mecab方法的8個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: get_tokenizer

# 需要導入模塊: from konlpy import tag [as 別名]
# 或者: from konlpy.tag import Mecab [as 別名]
def get_tokenizer(tokenizer_name):
    if tokenizer_name == "komoran":
        tokenizer = Komoran()
    elif tokenizer_name == "okt":
        tokenizer = Okt()
    elif tokenizer_name == "mecab":
        tokenizer = Mecab()
    elif tokenizer_name == "hannanum":
        tokenizer = Hannanum()
    elif tokenizer_name == "kkma":
        tokenizer = Kkma()
    elif tokenizer_name == "khaiii":
        tokenizer = KhaiiiApi()
    else:
        tokenizer = Mecab()
    return tokenizer 
開發者ID:ratsgo,項目名稱:embedding,代碼行數:18,代碼來源:supervised_nlputils.py

示例2: morphTag

# 需要導入模塊: from konlpy import tag [as 別名]
# 或者: from konlpy.tag import Mecab [as 別名]
def morphTag(in_fname, out_fname):
    mec = Mecab()
    corpus = readfileUTF8(in_fname)
    concat_sent = []
    for n in range(0, len(corpus)):
        tagged = mec.pos(corpus[n])
        concat = ''
        for m in range(0, len(tagged)):
            if m < len(tagged):
                concat = concat + tagged[m][0] + '/' + tagged[m][1] + ' '
            elif m == len(tagged):  # When reached the final item
                concat = concat + tagged[m][0] + '/' + tagged[m][1]

        concat_sent.append(concat)
    writefile(concat_sent, out_fname)

    return concat_sent 
開發者ID:scarletcho,項目名稱:KoLM,代碼行數:19,代碼來源:tag.py

示例3: __init__

# 需要導入模塊: from konlpy import tag [as 別名]
# 或者: from konlpy.tag import Mecab [as 別名]
def __init__(self):
        try:
            from konlpy.tag import Mecab
        except ImportError:
            raise ImportError(
                'Mecab is not installed. '
                'You can install Mecab with "sh scripts/install_mecab.sh" '
                'You can refer to the installation guide in https://github.com/lyeoni/prenlp/blob/master/scripts/install_mecab.sh or https://bitbucket.org/eunjeon/mecab-ko-dic/src')
        self.tokenizer = Mecab() 
開發者ID:lyeoni,項目名稱:prenlp,代碼行數:11,代碼來源:tokenizer.py

示例4: get_mecab

# 需要導入模塊: from konlpy import tag [as 別名]
# 或者: from konlpy.tag import Mecab [as 別名]
def get_mecab(self, dict_path):
        try:
            if dict_path:
                return Mecab(dict_path) # for annotation
            else:
                return Mecab()
        except Exception as e:
            raise Exception(
                'If you want to install mecab, The command is.. bash <(curl -s https://raw.githubusercontent.com/konlpy/konlpy/master/scripts/mecab.sh)'
            ) 
開發者ID:Kyubyong,項目名稱:g2pK,代碼行數:12,代碼來源:g2pk.py

示例5: _mecab_ko

# 需要導入模塊: from konlpy import tag [as 別名]
# 或者: from konlpy.tag import Mecab [as 別名]
def _mecab_ko(self, text):
        if self.word_tokenizer is None:
            from konlpy.tag import Mecab

            self.word_tokenizer = Mecab()

        return self.word_tokenizer.morphs(text) 
開發者ID:naver,項目名稱:claf,代碼行數:9,代碼來源:word.py

示例6: main

# 需要導入模塊: from konlpy import tag [as 別名]
# 或者: from konlpy.tag import Mecab [as 別名]
def main(config):
    print(config)
    
    list_of_tokens = []
    if config.is_tokenized:
        # Read tokens
        with open(config.corpus, 'r', encoding='utf8') as reader:
            for li, line in enumerate(reader):
                list_of_tokens += line.strip().split()
    else:
        # Select tokenizer
        if config.tokenizer=='mecab':
            from konlpy.tag import Mecab
            tokenizer = Tokenizer(tokenization_fn=Mecab().morphs)

        # Tokenization & read tokens
        with open(config.corpus, 'r', encoding='utf8') as reader:
            for li, line in enumerate(reader):
                list_of_tokens += tokenizer.tokenize(line.strip())

    # Build vocabulary                
    vocab = Vocab(list_of_tokens=list_of_tokens,
                  unk_token=config.unk_token,
                  pad_token=config.pad_token,
                  bos_token=config.bos_token,
                  eos_token=config.eos_token,
                  min_freq=config.min_freq,
                  lower=config.lower)
    vocab.build()
    print('Vocabulary size: ', len(vocab))

    # Save vocabulary
    with open(config.vocab, 'wb') as writer:
        pickle.dump(vocab, writer)
    print('Vocabulary saved to', config.vocab) 
開發者ID:lyeoni,項目名稱:pretraining-for-language-understanding,代碼行數:37,代碼來源:build_vocab.py

示例7: __init__

# 需要導入模塊: from konlpy import tag [as 別名]
# 或者: from konlpy.tag import Mecab [as 別名]
def __init__(self, tagger, useful_tags, delimiters, min_token_length, stopwords, **kwargs):
        if tagger == 'twitter':
            self.tagger = taggers.Twitter()
            self.tagger_options = {
                'norm': bool(kwargs.get('norm', True)),
                'stem': bool(kwargs.get('stem', True)),
            }
        elif tagger == 'komoran':
            self.tagger = taggers.Komoran()
            self.tagger_options = {
                'flatten': bool(kwargs.get('flatten', True)),
            }
        elif tagger == 'hannanum':
            self.tagger = taggers.Hannanum()
            self.tagger_options = {
                'ntags': int(kwargs.get('ntags', 9)),
                'flatten': bool(kwargs.get('flatten', True)),
            }
        elif tagger == 'kkma':
            self.tagger = taggers.Kkma()
            self.tagger_options = {
                'flatten': bool(kwargs.get('flatten', True)),
            }
        elif tagger == 'mecab':
            self.tagger = taggers.Mecab()
            self.tagger_options = {
                'flatten': bool(kwargs.get('flatten', True)),
            }
        else:
            raise LexRankError("available taggers are: twitter, komoran, hannanum, kkma, mecab")
        self.useful_tags = useful_tags
        self.delimiters = delimiters
        self.stopwords = stopwords
        self.min_token_length = min_token_length
        self.splitter = self.splitterer()
        self.pos = lambda text: self.tagger.pos(text, **self.tagger_options) 
開發者ID:theeluwin,項目名稱:lexrankr,代碼行數:38,代碼來源:lexrankr.py

示例8: load_tokenizer

# 需要導入模塊: from konlpy import tag [as 別名]
# 或者: from konlpy.tag import Mecab [as 別名]
def load_tokenizer(lang):
    if lang == "ko":
        from konlpy.tag import Mecab
        tokenizer = Mecab()
    elif lang == "ja":
        import Mykytea
        opt = "-model jp-0.4.7-1.mod"
        tokenizer = Mykytea.Mykytea(opt)
    elif lang == "zh_cn":
        import Mykytea
        opt = "-model ctb-0.4.0-1.mod"
        tokenizer = Mykytea.Mykytea(opt)
    elif lang == "zh_tw":
        import jieba
        tokenizer = jieba
    elif lang == "vi":
        from pyvi import ViTokenizer
        tokenizer = ViTokenizer
    elif lang == "th":
        from pythainlp.tokenize import word_tokenize
        tokenizer = word_tokenize
    elif lang == "ar":
        import pyarabic.araby as araby
        tokenizer = araby
    # elif lang=="en":
    #     from nltk import word_tokenize
    #     tokenizer = word_tokenize
    else:
        from nltk.tokenize import ToktokTokenizer
        tokenizer = ToktokTokenizer()

    return tokenizer 
開發者ID:kakaobrain,項目名稱:word2word,代碼行數:34,代碼來源:tokenization.py


注:本文中的konlpy.tag.Mecab方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。