当前位置: 首页>>代码示例>>Python>>正文


Python stem.WordNetLemmatizer方法代码示例

本文整理汇总了Python中nltk.stem.WordNetLemmatizer方法的典型用法代码示例。如果您正苦于以下问题:Python stem.WordNetLemmatizer方法的具体用法?Python stem.WordNetLemmatizer怎么用?Python stem.WordNetLemmatizer使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.stem的用法示例。


在下文中一共展示了stem.WordNetLemmatizer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: clean_text

# 需要导入模块: from nltk import stem [as 别名]
# 或者: from nltk.stem import WordNetLemmatizer [as 别名]
def clean_text(text):
        # stop_words = stopwords.words('english')
        stop_words = []
        stop_words.extend(['!', ',' ,'.' ,'?' ,'-s' ,'-ly' ,'</s> ', 's'])
        stemmer = WordNetLemmatizer()

        text = remove_short(text)
        text = clean_str(text)

        text = word_tokenize(text)

        text = [word for word in text if word not in stop_words]

        text = [stemmer.lemmatize(word) for word in text]

        return ' '.join(text) 
开发者ID:HuangLianzhe,项目名称:TextLevelGCN,代码行数:18,代码来源:pre_processing.py

示例2: real_word

# 需要导入模块: from nltk import stem [as 别名]
# 或者: from nltk.stem import WordNetLemmatizer [as 别名]
def real_word(self, word, LEMMATIZATION_flag=True):
        '''
        find the real word
        '''
        p_forword = re.compile('[a-z,A-Z,\',‘]')
        word_s = p_forword.findall(word)
        real_word = ''.join(word_s)#.lower()
        if LEMMATIZATION_flag and self.config['LEMMATIZATION_MODE'] in ['list', 'both']:
            try:
                real_word = self.fix_dic[real_word]
            except Exception as e:
                logger.debug(e)
                pass
        if LEMMATIZATION_flag and self.config['LEMMATIZATION_MODE'] in ['NLTK', 'both']:
            wordnet_lemmatizer = WordNetLemmatizer()
            real_word = wordnet_lemmatizer.lemmatize(real_word)
        logger.debug(word+'-->'+real_word)
        return real_word 
开发者ID:Steven-AA,项目名称:find-all-the-new-words,代码行数:20,代码来源:Article.py

示例3: __init__

# 需要导入模块: from nltk import stem [as 别名]
# 或者: from nltk.stem import WordNetLemmatizer [as 别名]
def __init__(self):
        super().__init__()

        self.entity_dict = {}
        self.abbr_dict = {}

        self.wn = WordNetLemmatizer()

        self.tokenized_data_path = './data/kvret/'
        self._construct(cfg.train, cfg.dev, cfg.test, cfg.entity)
        #self.test = self.train 
开发者ID:AuCson,项目名称:SEDST,代码行数:13,代码来源:reader.py

示例4: __init__

# 需要导入模块: from nltk import stem [as 别名]
# 或者: from nltk.stem import WordNetLemmatizer [as 别名]
def __init__(self):
        super().__init__()

        self.entity_dict = {}
        self.abbr_dict = {}

        self.wn = WordNetLemmatizer()
        self.db = {}

        self.tokenized_data_path = './data/kvret/'
        self._construct(cfg.train, cfg.dev, cfg.test, cfg.entity) 
开发者ID:ConvLab,项目名称:ConvLab,代码行数:13,代码来源:reader.py

示例5: __lemmatize

# 需要导入模块: from nltk import stem [as 别名]
# 或者: from nltk.stem import WordNetLemmatizer [as 别名]
def __lemmatize(self, lemma):
        """
        Internal method used for applying the nltk.stem.WordNetStemmer() to the (word, pos) pair lemma.
        """
        string, tag = lemma
        if tag in ('a', 'n', 'r', 'v'):        
            wnl = WordNetLemmatizer()
            string = wnl.lemmatize(string, tag)
        return (string, tag)

    ######################################################################    
    # POSITIONING. 
开发者ID:cgpotts,项目名称:pdtb2,代码行数:14,代码来源:pdtb2.py

示例6: __init__

# 需要导入模块: from nltk import stem [as 别名]
# 或者: from nltk.stem import WordNetLemmatizer [as 别名]
def __init__(self):
		self.wnl = WordNetLemmatizer() 
开发者ID:blei-lab,项目名称:causal-text-embeddings,代码行数:4,代码来源:helpers.py

示例7: __init__

# 需要导入模块: from nltk import stem [as 别名]
# 或者: from nltk.stem import WordNetLemmatizer [as 别名]
def __init__(self):
        self.wnl = WordNetLemmatizer() 
开发者ID:allenai,项目名称:OpenBookQA,代码行数:4,代码来源:rank_knowledge_for_mc_qa.py

示例8: __wn_lemmatize

# 需要导入模块: from nltk import stem [as 别名]
# 或者: from nltk.stem import WordNetLemmatizer [as 别名]
def __wn_lemmatize(self, lemma):
        """
        Lemmatize lemma using wordnet.stemWordNetLemmatizer(). Always
        returns a (string, pos) pair.  Lemmatizes even when the tag
        isn't helpful, by ignoring it for stemming.
        """
        string, tag = lemma
        wnl = WordNetLemmatizer()
        if tag in ('a', 'n', 'r', 'v'):
            string = wnl.lemmatize(string, tag)
        else:
            string = wnl.lemmatize(string)
        return (string, tag) 
开发者ID:cgpotts,项目名称:swda,代码行数:15,代码来源:swda.py

示例9: __init__

# 需要导入模块: from nltk import stem [as 别名]
# 或者: from nltk.stem import WordNetLemmatizer [as 别名]
def __init__(self):
        """
        Intialize memebers:
        question_dist - generalized-question distribution of the assigned extraction
                        location.
        """
        self.question_dist = defaultdict(lambda : defaultdict(lambda : 0))
        self.lmtzr = WordNetLemmatizer() 
开发者ID:gabrielStanovsky,项目名称:oie-benchmark,代码行数:10,代码来源:analyze.py

示例10: preprocessing

# 需要导入模块: from nltk import stem [as 别名]
# 或者: from nltk.stem import WordNetLemmatizer [as 别名]
def preprocessing(text):
    text2 = " ".join("".join([" " if ch in string.punctuation else ch for ch in text]).split())

    tokens = [word for sent in nltk.sent_tokenize(text2) for word in
              nltk.word_tokenize(sent)]
    
    tokens = [word.lower() for word in tokens]
    
    stopwds = stopwords.words('english')
    tokens = [token for token in tokens if token not in stopwds]
    
    tokens = [word for word in tokens if len(word)>=3]
    
    stemmer = PorterStemmer()
    tokens = [stemmer.stem(word) for word in tokens]

    tagged_corpus = pos_tag(tokens)    
    
    Noun_tags = ['NN','NNP','NNPS','NNS']
    Verb_tags = ['VB','VBD','VBG','VBN','VBP','VBZ']

    lemmatizer = WordNetLemmatizer()

    def prat_lemmatize(token,tag):
        if tag in Noun_tags:
            return lemmatizer.lemmatize(token,'n')
        elif tag in Verb_tags:
            return lemmatizer.lemmatize(token,'v')
        else:
            return lemmatizer.lemmatize(token,'n')
    
    pre_proc_text =  " ".join([prat_lemmatize(token,tag) for token,tag in tagged_corpus])             

    return pre_proc_text 
开发者ID:PacktPublishing,项目名称:Natural-Language-Processing-with-Python-Cookbook,代码行数:36,代码来源:9.5 Skipgram_Keras.py

示例11: preprocessing

# 需要导入模块: from nltk import stem [as 别名]
# 或者: from nltk.stem import WordNetLemmatizer [as 别名]
def preprocessing(text):
    text2 = " ".join("".join([" " if ch in string.punctuation else ch for ch in text]).split())

    tokens = [word for sent in nltk.sent_tokenize(text2) for word in
              nltk.word_tokenize(sent)]
    
    tokens = [word.lower() for word in tokens]
    
    stopwds = stopwords.words('english')
    tokens = [token for token in tokens if token not in stopwds]
    
    tokens = [word for word in tokens if len(word)>=3]
    
    stemmer = PorterStemmer()
    try:
        tokens = [stemmer.stem(word) for word in tokens]

    except:
        tokens = tokens
        
    tagged_corpus = pos_tag(tokens)    
    
    Noun_tags = ['NN','NNP','NNPS','NNS']
    Verb_tags = ['VB','VBD','VBG','VBN','VBP','VBZ']

    lemmatizer = WordNetLemmatizer()

    def prat_lemmatize(token,tag):
        if tag in Noun_tags:
            return lemmatizer.lemmatize(token,'n')
        elif tag in Verb_tags:
            return lemmatizer.lemmatize(token,'v')
        else:
            return lemmatizer.lemmatize(token,'n')
    
    pre_proc_text =  " ".join([prat_lemmatize(token,tag) for token,tag in tagged_corpus])             

    return pre_proc_text 
开发者ID:PacktPublishing,项目名称:Natural-Language-Processing-with-Python-Cookbook,代码行数:40,代码来源:9.2 Email_Classification.py

示例12: stem_corpus

# 需要导入模块: from nltk import stem [as 别名]
# 或者: from nltk.stem import WordNetLemmatizer [as 别名]
def stem_corpus():
    stemmer = WordNetLemmatizer()

    with open('data/mr/text_train.txt') as f:
        raw_text = f.read()

    with open('data/mr/label_train.txt') as f:
        raw_labels = f.read()

    labels = []
    for raw_label in raw_labels.split('\n'):
        if raw_label == '1':
            labels.append('pos')
        elif raw_label == '0':
            labels.append('neg')
        else:
            if len(raw_label) == 0:
                continue
            raise ValueError(raw_label)

    corpus = raw_text.split('\n')
    corpus = [clean_str(doc) for doc in corpus]
    corpus = [remove_short(doc) for doc in corpus]

    tokenized_corpus = [word_tokenize(doc) for doc in corpus]

    results = []

    for line in tokenized_corpus:
        results.append(' '.join([stemmer.lemmatize(word) for word in line]))

    results = list(zip(labels, results))
    results = ['\t'.join(line) for line in results]
    random.shuffle(results)

    with open('data/mr/mr-train-stemmed.txt', 'w') as f:
        f.write('\n'.join(results)) 
开发者ID:HuangLianzhe,项目名称:TextLevelGCN,代码行数:39,代码来源:pre_processing.py

示例13: _phrase_stem

# 需要导入模块: from nltk import stem [as 别名]
# 或者: from nltk.stem import WordNetLemmatizer [as 别名]
def _phrase_stem(cls, phrase):
        wnl = WordNetLemmatizer()
        l_term = phrase.split()
        l_term = [wnl.lemmatize(term, 'n') for term in l_term]
        return ' '.join(l_term) 
开发者ID:thunlp,项目名称:EntityDuetNeuralRanking,代码行数:7,代码来源:cmns.py

示例14: extract_experience

# 需要导入模块: from nltk import stem [as 别名]
# 或者: from nltk.stem import WordNetLemmatizer [as 别名]
def extract_experience(resume_text):
    '''
    Helper function to extract experience from resume text

    :param resume_text: Plain resume text
    :return: list of experience
    '''
    wordnet_lemmatizer = WordNetLemmatizer()
    stop_words = set(stopwords.words('english'))

    # word tokenization 
    word_tokens = nltk.word_tokenize(resume_text)

    # remove stop words and lemmatize  
    filtered_sentence = [w for w in word_tokens if not w in stop_words and wordnet_lemmatizer.lemmatize(w) not in stop_words] 
    sent = nltk.pos_tag(filtered_sentence)

    # parse regex
    cp = nltk.RegexpParser('P: {<NNP>+}')
    cs = cp.parse(sent)
    
    # for i in cs.subtrees(filter=lambda x: x.label() == 'P'):
    #     print(i)
    
    test = []
    
    for vp in list(cs.subtrees(filter=lambda x: x.label()=='P')):
        test.append(" ".join([i[0] for i in vp.leaves() if len(vp.leaves()) >= 2]))

    # Search the word 'experience' in the chunk and then print out the text after it
    x = [x[x.lower().index('experience') + 10:] for i, x in enumerate(test) if x and 'experience' in x.lower()]
    return x 
开发者ID:OmkarPathak,项目名称:ResumeParser,代码行数:34,代码来源:utils.py

示例15: __init__

# 需要导入模块: from nltk import stem [as 别名]
# 或者: from nltk.stem import WordNetLemmatizer [as 别名]
def __init__(self, stemmer='porter', tokenize=True, case_sensitive=False,
                 *args, **kwargs):
        if isinstance(stemmer, str):
            if stemmer not in self._stemmers:
                valid = list(self._stemmers.keys())
                raise ValueError("Invalid stemmer '%s'; please use one of %s."
                                 % (stemmer, valid))
            stemmer = getattr(stem, self._stemmers[stemmer])(*args, **kwargs)
        elif not isinstance(stemmer, (stem.StemmerI, stem.WordNetLemmatizer)):
            raise ValueError("stemmer must be either a valid string, or an "
                             "instance of class StemmerI.")
        self.stemmer = stemmer
        self.tokenize = tokenize
        self.case_sensitive = case_sensitive
        super().__init__() 
开发者ID:tyarkoni,项目名称:pliers,代码行数:17,代码来源:text.py


注:本文中的nltk.stem.WordNetLemmatizer方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。