當前位置: 首頁>>代碼示例>>Python>>正文


Python stem.WordNetLemmatizer方法代碼示例

本文整理匯總了Python中nltk.stem.WordNetLemmatizer方法的典型用法代碼示例。如果您正苦於以下問題:Python stem.WordNetLemmatizer方法的具體用法?Python stem.WordNetLemmatizer怎麽用?Python stem.WordNetLemmatizer使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在nltk.stem的用法示例。


在下文中一共展示了stem.WordNetLemmatizer方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: clean_text

# 需要導入模塊: from nltk import stem [as 別名]
# 或者: from nltk.stem import WordNetLemmatizer [as 別名]
def clean_text(text):
        # stop_words = stopwords.words('english')
        stop_words = []
        stop_words.extend(['!', ',' ,'.' ,'?' ,'-s' ,'-ly' ,'</s> ', 's'])
        stemmer = WordNetLemmatizer()

        text = remove_short(text)
        text = clean_str(text)

        text = word_tokenize(text)

        text = [word for word in text if word not in stop_words]

        text = [stemmer.lemmatize(word) for word in text]

        return ' '.join(text) 
開發者ID:HuangLianzhe,項目名稱:TextLevelGCN,代碼行數:18,代碼來源:pre_processing.py

示例2: real_word

# 需要導入模塊: from nltk import stem [as 別名]
# 或者: from nltk.stem import WordNetLemmatizer [as 別名]
def real_word(self, word, LEMMATIZATION_flag=True):
        '''
        find the real word
        '''
        p_forword = re.compile('[a-z,A-Z,\',‘]')
        word_s = p_forword.findall(word)
        real_word = ''.join(word_s)#.lower()
        if LEMMATIZATION_flag and self.config['LEMMATIZATION_MODE'] in ['list', 'both']:
            try:
                real_word = self.fix_dic[real_word]
            except Exception as e:
                logger.debug(e)
                pass
        if LEMMATIZATION_flag and self.config['LEMMATIZATION_MODE'] in ['NLTK', 'both']:
            wordnet_lemmatizer = WordNetLemmatizer()
            real_word = wordnet_lemmatizer.lemmatize(real_word)
        logger.debug(word+'-->'+real_word)
        return real_word 
開發者ID:Steven-AA,項目名稱:find-all-the-new-words,代碼行數:20,代碼來源:Article.py

示例3: __init__

# 需要導入模塊: from nltk import stem [as 別名]
# 或者: from nltk.stem import WordNetLemmatizer [as 別名]
def __init__(self):
        super().__init__()

        self.entity_dict = {}
        self.abbr_dict = {}

        self.wn = WordNetLemmatizer()

        self.tokenized_data_path = './data/kvret/'
        self._construct(cfg.train, cfg.dev, cfg.test, cfg.entity)
        #self.test = self.train 
開發者ID:AuCson,項目名稱:SEDST,代碼行數:13,代碼來源:reader.py

示例4: __init__

# 需要導入模塊: from nltk import stem [as 別名]
# 或者: from nltk.stem import WordNetLemmatizer [as 別名]
def __init__(self):
        super().__init__()

        self.entity_dict = {}
        self.abbr_dict = {}

        self.wn = WordNetLemmatizer()
        self.db = {}

        self.tokenized_data_path = './data/kvret/'
        self._construct(cfg.train, cfg.dev, cfg.test, cfg.entity) 
開發者ID:ConvLab,項目名稱:ConvLab,代碼行數:13,代碼來源:reader.py

示例5: __lemmatize

# 需要導入模塊: from nltk import stem [as 別名]
# 或者: from nltk.stem import WordNetLemmatizer [as 別名]
def __lemmatize(self, lemma):
        """
        Internal method used for applying the nltk.stem.WordNetStemmer() to the (word, pos) pair lemma.
        """
        string, tag = lemma
        if tag in ('a', 'n', 'r', 'v'):        
            wnl = WordNetLemmatizer()
            string = wnl.lemmatize(string, tag)
        return (string, tag)

    ######################################################################    
    # POSITIONING. 
開發者ID:cgpotts,項目名稱:pdtb2,代碼行數:14,代碼來源:pdtb2.py

示例6: __init__

# 需要導入模塊: from nltk import stem [as 別名]
# 或者: from nltk.stem import WordNetLemmatizer [as 別名]
def __init__(self):
		self.wnl = WordNetLemmatizer() 
開發者ID:blei-lab,項目名稱:causal-text-embeddings,代碼行數:4,代碼來源:helpers.py

示例7: __init__

# 需要導入模塊: from nltk import stem [as 別名]
# 或者: from nltk.stem import WordNetLemmatizer [as 別名]
def __init__(self):
        self.wnl = WordNetLemmatizer() 
開發者ID:allenai,項目名稱:OpenBookQA,代碼行數:4,代碼來源:rank_knowledge_for_mc_qa.py

示例8: __wn_lemmatize

# 需要導入模塊: from nltk import stem [as 別名]
# 或者: from nltk.stem import WordNetLemmatizer [as 別名]
def __wn_lemmatize(self, lemma):
        """
        Lemmatize lemma using wordnet.stemWordNetLemmatizer(). Always
        returns a (string, pos) pair.  Lemmatizes even when the tag
        isn't helpful, by ignoring it for stemming.
        """
        string, tag = lemma
        wnl = WordNetLemmatizer()
        if tag in ('a', 'n', 'r', 'v'):
            string = wnl.lemmatize(string, tag)
        else:
            string = wnl.lemmatize(string)
        return (string, tag) 
開發者ID:cgpotts,項目名稱:swda,代碼行數:15,代碼來源:swda.py

示例9: __init__

# 需要導入模塊: from nltk import stem [as 別名]
# 或者: from nltk.stem import WordNetLemmatizer [as 別名]
def __init__(self):
        """
        Intialize memebers:
        question_dist - generalized-question distribution of the assigned extraction
                        location.
        """
        self.question_dist = defaultdict(lambda : defaultdict(lambda : 0))
        self.lmtzr = WordNetLemmatizer() 
開發者ID:gabrielStanovsky,項目名稱:oie-benchmark,代碼行數:10,代碼來源:analyze.py

示例10: preprocessing

# 需要導入模塊: from nltk import stem [as 別名]
# 或者: from nltk.stem import WordNetLemmatizer [as 別名]
def preprocessing(text):
    text2 = " ".join("".join([" " if ch in string.punctuation else ch for ch in text]).split())

    tokens = [word for sent in nltk.sent_tokenize(text2) for word in
              nltk.word_tokenize(sent)]
    
    tokens = [word.lower() for word in tokens]
    
    stopwds = stopwords.words('english')
    tokens = [token for token in tokens if token not in stopwds]
    
    tokens = [word for word in tokens if len(word)>=3]
    
    stemmer = PorterStemmer()
    tokens = [stemmer.stem(word) for word in tokens]

    tagged_corpus = pos_tag(tokens)    
    
    Noun_tags = ['NN','NNP','NNPS','NNS']
    Verb_tags = ['VB','VBD','VBG','VBN','VBP','VBZ']

    lemmatizer = WordNetLemmatizer()

    def prat_lemmatize(token,tag):
        if tag in Noun_tags:
            return lemmatizer.lemmatize(token,'n')
        elif tag in Verb_tags:
            return lemmatizer.lemmatize(token,'v')
        else:
            return lemmatizer.lemmatize(token,'n')
    
    pre_proc_text =  " ".join([prat_lemmatize(token,tag) for token,tag in tagged_corpus])             

    return pre_proc_text 
開發者ID:PacktPublishing,項目名稱:Natural-Language-Processing-with-Python-Cookbook,代碼行數:36,代碼來源:9.5 Skipgram_Keras.py

示例11: preprocessing

# 需要導入模塊: from nltk import stem [as 別名]
# 或者: from nltk.stem import WordNetLemmatizer [as 別名]
def preprocessing(text):
    text2 = " ".join("".join([" " if ch in string.punctuation else ch for ch in text]).split())

    tokens = [word for sent in nltk.sent_tokenize(text2) for word in
              nltk.word_tokenize(sent)]
    
    tokens = [word.lower() for word in tokens]
    
    stopwds = stopwords.words('english')
    tokens = [token for token in tokens if token not in stopwds]
    
    tokens = [word for word in tokens if len(word)>=3]
    
    stemmer = PorterStemmer()
    try:
        tokens = [stemmer.stem(word) for word in tokens]

    except:
        tokens = tokens
        
    tagged_corpus = pos_tag(tokens)    
    
    Noun_tags = ['NN','NNP','NNPS','NNS']
    Verb_tags = ['VB','VBD','VBG','VBN','VBP','VBZ']

    lemmatizer = WordNetLemmatizer()

    def prat_lemmatize(token,tag):
        if tag in Noun_tags:
            return lemmatizer.lemmatize(token,'n')
        elif tag in Verb_tags:
            return lemmatizer.lemmatize(token,'v')
        else:
            return lemmatizer.lemmatize(token,'n')
    
    pre_proc_text =  " ".join([prat_lemmatize(token,tag) for token,tag in tagged_corpus])             

    return pre_proc_text 
開發者ID:PacktPublishing,項目名稱:Natural-Language-Processing-with-Python-Cookbook,代碼行數:40,代碼來源:9.2 Email_Classification.py

示例12: stem_corpus

# 需要導入模塊: from nltk import stem [as 別名]
# 或者: from nltk.stem import WordNetLemmatizer [as 別名]
def stem_corpus():
    stemmer = WordNetLemmatizer()

    with open('data/mr/text_train.txt') as f:
        raw_text = f.read()

    with open('data/mr/label_train.txt') as f:
        raw_labels = f.read()

    labels = []
    for raw_label in raw_labels.split('\n'):
        if raw_label == '1':
            labels.append('pos')
        elif raw_label == '0':
            labels.append('neg')
        else:
            if len(raw_label) == 0:
                continue
            raise ValueError(raw_label)

    corpus = raw_text.split('\n')
    corpus = [clean_str(doc) for doc in corpus]
    corpus = [remove_short(doc) for doc in corpus]

    tokenized_corpus = [word_tokenize(doc) for doc in corpus]

    results = []

    for line in tokenized_corpus:
        results.append(' '.join([stemmer.lemmatize(word) for word in line]))

    results = list(zip(labels, results))
    results = ['\t'.join(line) for line in results]
    random.shuffle(results)

    with open('data/mr/mr-train-stemmed.txt', 'w') as f:
        f.write('\n'.join(results)) 
開發者ID:HuangLianzhe,項目名稱:TextLevelGCN,代碼行數:39,代碼來源:pre_processing.py

示例13: _phrase_stem

# 需要導入模塊: from nltk import stem [as 別名]
# 或者: from nltk.stem import WordNetLemmatizer [as 別名]
def _phrase_stem(cls, phrase):
        wnl = WordNetLemmatizer()
        l_term = phrase.split()
        l_term = [wnl.lemmatize(term, 'n') for term in l_term]
        return ' '.join(l_term) 
開發者ID:thunlp,項目名稱:EntityDuetNeuralRanking,代碼行數:7,代碼來源:cmns.py

示例14: extract_experience

# 需要導入模塊: from nltk import stem [as 別名]
# 或者: from nltk.stem import WordNetLemmatizer [as 別名]
def extract_experience(resume_text):
    '''
    Helper function to extract experience from resume text

    :param resume_text: Plain resume text
    :return: list of experience
    '''
    wordnet_lemmatizer = WordNetLemmatizer()
    stop_words = set(stopwords.words('english'))

    # word tokenization 
    word_tokens = nltk.word_tokenize(resume_text)

    # remove stop words and lemmatize  
    filtered_sentence = [w for w in word_tokens if not w in stop_words and wordnet_lemmatizer.lemmatize(w) not in stop_words] 
    sent = nltk.pos_tag(filtered_sentence)

    # parse regex
    cp = nltk.RegexpParser('P: {<NNP>+}')
    cs = cp.parse(sent)
    
    # for i in cs.subtrees(filter=lambda x: x.label() == 'P'):
    #     print(i)
    
    test = []
    
    for vp in list(cs.subtrees(filter=lambda x: x.label()=='P')):
        test.append(" ".join([i[0] for i in vp.leaves() if len(vp.leaves()) >= 2]))

    # Search the word 'experience' in the chunk and then print out the text after it
    x = [x[x.lower().index('experience') + 10:] for i, x in enumerate(test) if x and 'experience' in x.lower()]
    return x 
開發者ID:OmkarPathak,項目名稱:ResumeParser,代碼行數:34,代碼來源:utils.py

示例15: __init__

# 需要導入模塊: from nltk import stem [as 別名]
# 或者: from nltk.stem import WordNetLemmatizer [as 別名]
def __init__(self, stemmer='porter', tokenize=True, case_sensitive=False,
                 *args, **kwargs):
        if isinstance(stemmer, str):
            if stemmer not in self._stemmers:
                valid = list(self._stemmers.keys())
                raise ValueError("Invalid stemmer '%s'; please use one of %s."
                                 % (stemmer, valid))
            stemmer = getattr(stem, self._stemmers[stemmer])(*args, **kwargs)
        elif not isinstance(stemmer, (stem.StemmerI, stem.WordNetLemmatizer)):
            raise ValueError("stemmer must be either a valid string, or an "
                             "instance of class StemmerI.")
        self.stemmer = stemmer
        self.tokenize = tokenize
        self.case_sensitive = case_sensitive
        super().__init__() 
開發者ID:tyarkoni,項目名稱:pliers,代碼行數:17,代碼來源:text.py


注:本文中的nltk.stem.WordNetLemmatizer方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。