當前位置: 首頁>>代碼示例>>Python>>正文


Python jieba.add_word方法代碼示例

本文整理匯總了Python中jieba.add_word方法的典型用法代碼示例。如果您正苦於以下問題:Python jieba.add_word方法的具體用法?Python jieba.add_word怎麽用?Python jieba.add_word使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在jieba的用法示例。


在下文中一共展示了jieba.add_word方法的13個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: segment

# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import add_word [as 別名]
def segment(self, sentence, entity_postag=dict()):
        """采用NLPIR進行分詞處理
        Args:
            sentence: string,句子
            entity_postag: dict,實體詞性詞典,默認為空集合,分析每一個案例的結構化文本時產生
        Returns:
            lemmas: list,分詞結果
        """
        # 添加實體詞典
        if entity_postag:
            for entity in entity_postag:
                # pynlpir.nlpir.AddUserWord(c_char_p(entity.encode()))
                jieba.add_word(entity)
        # pynlpir.nlpir.AddUserWord(c_char_p('前任'.encode()))  # 單個用戶詞加入示例
        # pynlpir.nlpir.AddUserWord(c_char_p('習近平'.encode()))  # 單個用戶詞加入示例
        # 分詞,不進行詞性標注
        # lemmas = pynlpir.segment(sentence, pos_tagging=False)
        lemmas = jieba.lcut(sentence)
        # pynlpir.close()  # 釋放
        return lemmas 
開發者ID:lemonhu,項目名稱:open-entity-relation-extraction,代碼行數:22,代碼來源:nlp.py

示例2: jiebaCustomSetting

# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import add_word [as 別名]
def jiebaCustomSetting(self, dict_path, usr_dict_path):

        jieba.set_dictionary(dict_path)
        with open(usr_dict_path, 'r', encoding='utf-8') as dic:
            for word in dic:
                jieba.add_word(word.strip('\n')) 
開發者ID:zake7749,項目名稱:Chatbot,代碼行數:8,代碼來源:matcher.py

示例3: __init__

# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import add_word [as 別名]
def __init__(self, user_dict_dir=default_user_dict_dir, model_dir=default_model_dir):
        self.default_user_dict_dir = user_dict_dir
        self.default_model_dir = model_dir
        # 初始化分詞器
        # pynlpir.open()  # 初始化分詞器
        # 添加用戶詞典(法律文書大辭典與清華大學法律詞典),這種方式是添加進內存中,速度更快
        files = os.listdir(user_dict_dir)
        for file in files:
            file_path = os.path.join(user_dict_dir, file)
            # 文件夾則跳過
            if os.path.isdir(file):
                continue
            with open(file_path, 'r', encoding='utf-8') as f:
                line = f.readline()
                while line:
                    word = line.strip('\n').strip()
                    jieba.add_word(word)
                    # print(c_char_p(word.encode()))
                    # pynlpir.nlpir.AddUserWord(c_char_p(word.encode()))
                    line = f.readline()

        # 加載ltp模型
        # 詞性標注模型
        self.postagger = Postagger()
        postag_flag = self.postagger.load(os.path.join(self.default_model_dir, 'pos.model'))
        # 命名實體識別模型
        self.recognizer = NamedEntityRecognizer()
        ner_flag = self.recognizer.load(os.path.join(self.default_model_dir, 'ner.model'))
        # 依存句法分析模型
        self.parser = Parser()
        parse_flag = self.parser.load(os.path.join(self.default_model_dir, 'parser.model'))

        if postag_flag or ner_flag or parse_flag:
            print('load model failed!') 
開發者ID:lemonhu,項目名稱:open-entity-relation-extraction,代碼行數:36,代碼來源:nlp.py

示例4: load_dict

# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import add_word [as 別名]
def load_dict():
    dics=csv.reader(open("DICT_NOW.csv",'r',encoding='utf8'))
    flag=0
    fuhao = [';', '。', '?', '?', '!', '!', ';']

    for row in dics:
        if flag==0:
            flag=1
            continue
        if len(row)==2:
            jieba.add_word(row[0].strip(),tag=row[1].strip()) 
開發者ID:nlpdz,項目名稱:Medical-Named-Entity-Rec-Based-on-Dilated-CNN,代碼行數:13,代碼來源:xxx.py

示例5: prepare

# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import add_word [as 別名]
def prepare(self):
        self.prepared = True
        for type0 in self.entity_types:
            tag0 = "n"
            if "人名" in type0:
                tag0 = "nr"
            elif "地名" in type0:
                tag0 = "ns"
            elif "機構" in type0:
                tag0 = "nt"
            elif "其他專名" in type0:
                tag0 = "nz"
            jieba.add_word(type0, freq = 10000, tag=tag0) 
開發者ID:blmoistawinde,項目名稱:HarvestText,代碼行數:15,代碼來源:harvesttext.py

示例6: cut

# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import add_word [as 別名]
def cut(text,custom_words=['FLOAT','TIME','DATE','EOS']):
    jieba.enable_parallel(32)
    for word in custom_words:
        jieba.add_word(word)
    words=jieba.lcut(text)
    return words 
開發者ID:QuantumLiu,項目名稱:Neural-Headline-Generator-CN,代碼行數:8,代碼來源:data_preprocess.py

示例7: load_word_dict

# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import add_word [as 別名]
def load_word_dict(self) -> None:
        if self.external_word_dict:
            for word in self.external_word_dict:
                jieba.add_word(word, freq=1000000) 
開發者ID:boat-group,項目名稱:fancy-nlp,代碼行數:6,代碼來源:spm_preprocessor.py

示例8: load_word_dict

# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import add_word [as 別名]
def load_word_dict(self):
        if self.external_word_dict:
            for word in self.external_word_dict:
                jieba.add_word(word, freq=1000000) 
開發者ID:boat-group,項目名稱:fancy-nlp,代碼行數:6,代碼來源:text_classification_preprocessor.py

示例9: load_word_dict

# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import add_word [as 別名]
def load_word_dict(self):
        """Load external word dictionary in jieba"""
        if self.external_word_dict:
            for word in self.external_word_dict:
                jieba.add_word(word, freq=1000000) 
開發者ID:boat-group,項目名稱:fancy-nlp,代碼行數:7,代碼來源:ner_preprocessor.py

示例10: load_word2jieba

# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import add_word [as 別名]
def load_word2jieba(self):
        vocab_list = load_pkl(self.vocab_list)
        if vocab_list != []:
            print("加載詞的總量: ", len(vocab_list))
            for word in vocab_list:
                jieba.add_word(word) 
開發者ID:apachecn,項目名稱:AiLearning,代碼行數:8,代碼來源:text_Emotion.py

示例11: load_data

# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import add_word [as 別名]
def load_data(self, word_index, vocab_list, test_size=0.25):
        STOPWORDS = ["-", "\t", "\n", ".", "。", ",", ",", ";", "!", "!", "?", "?", "%"]
        if vocab_list != []:
            for word in vocab_list:
                jieba.add_word(word)

        def func(line):
            # 將文本 ['1, 2, 3', '1, 2, .., n'] 分解為: [[1, 2, 3], [1, 2, .., n]]
            words = [word for word in jieba.cut(str(line), cut_all=False) if word not in STOPWORDS]
            indexs = [word_index.get(word, 0) for word in words]
            return indexs

        df = pd.read_excel(self.data_file, header=0, error_bad_lines=False, encoding="utf_8_sig")
        x = df["comment"].apply(lambda line: func(line)).tolist()
        x = pad_sequences(x, maxlen=self.MAX_SEQUENCE_LENGTH)
        y = df["label"].tolist()
        # 按照大小和順序,生成 label(0,1,2...自然數類型)
        """
        In [7]: to_categorical(np.asarray([1,1,0,1,3]))
        Out[7]:
        array([[0., 1., 0., 0.],
            [0., 1., 0., 0.],
            [1., 0., 0., 0.],
            [0., 1., 0., 0.],
            [0., 0., 0., 1.]], dtype=float32)
        """
        y = to_categorical(np.asarray(y))
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test_size, random_state=10000)
        return (x_train, y_train), (x_test, y_test) 
開發者ID:apachecn,項目名稱:AiLearning,代碼行數:31,代碼來源:text_Emotion.py

示例12: TaibaCustomSetting

# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import add_word [as 別名]
def TaibaCustomSetting(self, usr_dict):

        with open(usr_dict, 'r', encoding='utf-8') as dic:
            for word in dic:
                Taiba.add_word(word.strip('\n')) 
開發者ID:zake7749,項目名稱:PTT-Chat-Generator,代碼行數:7,代碼來源:matcher.py

示例13: sentence_segmentation

# 需要導入模塊: import jieba [as 別名]
# 或者: from jieba import add_word [as 別名]
def sentence_segmentation(self, sentence, entity1, entity2):
        jieba.add_word(entity1, freq=999999)
        jieba.add_word(entity2, freq=999999)

        seglist = list(jieba.cut(sentence, cut_all=False, HMM=False))
        jieba.del_word(entity1)
        jieba.del_word(entity2)
        return seglist 
開發者ID:qq547276542,項目名稱:Agriculture_KnowledgeGraph,代碼行數:10,代碼來源:data_loader.py


注:本文中的jieba.add_word方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。