当前位置: 首页>>代码示例>>Python>>正文


Python nltk.WordNetLemmatizer方法代码示例

本文整理汇总了Python中nltk.WordNetLemmatizer方法的典型用法代码示例。如果您正苦于以下问题:Python nltk.WordNetLemmatizer方法的具体用法?Python nltk.WordNetLemmatizer怎么用?Python nltk.WordNetLemmatizer使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk的用法示例。


在下文中一共展示了nltk.WordNetLemmatizer方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: preprocess_sentence

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import WordNetLemmatizer [as 别名]
def preprocess_sentence(sentence):
    lemmatizer = nltk.WordNetLemmatizer()
    # clearly list out our preprocessing pipeline
    processed_tokens = nltk.word_tokenize(sentence)
    processed_tokens = [w.lower() for w in processed_tokens]
    # find least common elements
    word_counts = collections.Counter(processed_tokens)
    uncommon_words = word_counts.most_common()[:-10:-1]
    # remove these tokens
    processed_tokens = [w for w in processed_tokens if w not in stop_words]
    processed_tokens = [w for w in processed_tokens if w not in uncommon_words]
    # lemmatize
    processed_tokens = [lemmatizer.lemmatize(w) for w in processed_tokens]
    return processed_tokens 
开发者ID:PacktPublishing,项目名称:Hands-on-NLP-with-NLTK-and-scikit-learn-,代码行数:16,代码来源:nlp-2-spam-classification.py

示例2: transform

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import WordNetLemmatizer [as 别名]
def transform(self, input_: list) -> list:
        """
        Lemmatization a sequence of tokens.

        :param input_: list of tokens to be lemmatized.

        :return tokens: list of lemmatizd tokens.
        """
        lemmatizer = nltk.WordNetLemmatizer()
        return [lemmatizer.lemmatize(token, pos='v') for token in input_] 
开发者ID:NTMC-Community,项目名称:MatchZoo-py,代码行数:12,代码来源:lemmatization.py

示例3: __init__

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import WordNetLemmatizer [as 别名]
def __init__(self, stopwords=None):
        self.stopwords  = set(stopwords or nltk.corpus.stopwords.words('english'))
        self.lemmatizer = nltk.WordNetLemmatizer() 
开发者ID:DistrictDataLabs,项目名称:partisan-discourse,代码行数:5,代码来源:learn.py

示例4: lemmatize

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import WordNetLemmatizer [as 别名]
def lemmatize(tokens):
    """
    lemmatize tokens
    """
    try:
        wnl = nltk.WordNetLemmatizer()
    except LookupError:
        nltk.download('wordnet')
        wnl = nltk.WordNetLemmatizer()
    return [wnl.lemmatize(t) for t in tokens] 
开发者ID:KevinLiao159,项目名称:Quora,代码行数:12,代码来源:nlp.py

示例5: cond2

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import WordNetLemmatizer [as 别名]
def cond2(w):
    """ Return word if its lemmatised form is not in the wordlist."""
    wnl = WordNetLemmatizer()
    return wnl.lemmatize(w.lower()) not in wordlist 
开发者ID:EFord36,项目名称:normalise,代码行数:6,代码来源:detect.py

示例6: __init__

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import WordNetLemmatizer [as 别名]
def __init__(self):
        self.install_nltk_corpora('stopwords', 'wordnet', 'punkt')
        self.lemmatizer = nltk.WordNetLemmatizer()
        self.lemmatizer.lemmatize('')  # Force nltk lazy corpus loader to do something.
        self.tokenizer = self.make_tokenizer()
        self.stopwords = nltk.corpus.stopwords.words('english')
        self.sent_tokenizer = None 
开发者ID:lgalke,项目名称:vec4ir,代码行数:9,代码来源:nltk_normalization.py

示例7: __init__

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import WordNetLemmatizer [as 别名]
def __init__(self):
        NltkNormalizer.install_nltk_corpora('averaged_perceptron_tagger')
        self.normalizer = NltkNormalizer()
        self.lem = nltk.WordNetLemmatizer()
        self.tagger = nltk.PerceptronTagger()
        self.translation_dict = {'J': wn.ADJ, 'N': wn.NOUN, 'R': wn.ADV, 'V': wn.VERB} 
开发者ID:quadflor,项目名称:Quadflor,代码行数:8,代码来源:synset_analysis.py

示例8: lemmatize_term

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import WordNetLemmatizer [as 别名]
def lemmatize_term(term, pos=None):
    if pos is None:
        synsets = wordnet.synsets(term)
        if not synsets:
            return term
        pos = synsets[0].pos()
        if pos == wordnet.ADJ_SAT:
            pos = wordnet.ADJ
    return nltk.WordNetLemmatizer().lemmatize(term, pos=pos) 
开发者ID:Hironsan,项目名称:natural-language-preprocessings,代码行数:11,代码来源:normalization.py

示例9: add_from_lemma_definitions

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import WordNetLemmatizer [as 别名]
def add_from_lemma_definitions(self, vocab, try_lower=False):
        """Add lemma definitions for non-lemmas.

        This code covers the following scenario: supposed a dictionary is crawled,
        but only for word lemmas.

        """
        lemmatizer = nltk.WordNetLemmatizer()
        added = 0
        for word in vocab.words:
            word_list = [word, word.lower()] if try_lower else [word]

            for word_to_lemma in word_list:
                try:
                    for part_of_speech in ['a', 's', 'r', 'n', 'v']:
                        lemma = lemmatizer.lemmatize(word_to_lemma, part_of_speech)
                        lemma_defs = self._data.get(lemma)
                        if lemma != word and lemma_defs:
                            # This can be quite slow. But this code will not be used
                            # very often.
                            for def_ in lemma_defs:
                                if not def_ in self._data[word]:
                                    added += 1
                                    self._data[word].append(def_)
                except:
                    logger.error("lemmatizer crashed on {}".format(word))
                    logger.error(traceback.format_exc())
        logger.info("Added {} new defs in add_from_lemma_definitions".format(added))
        self.save() 
开发者ID:tombosc,项目名称:cpae,代码行数:31,代码来源:retrieval.py

示例10: crawl_lemmas

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import WordNetLemmatizer [as 别名]
def crawl_lemmas(self, vocab):
        """Add Wordnet lemmas as definitions."""
        lemmatizer = nltk.WordNetLemmatizer()
        for word in vocab.words:
            definitions = []
            try:
                for part_of_speech in ['a', 's', 'r', 'n', 'v']:
                    lemma = lemmatizer.lemmatize(word, part_of_speech)
                    if lemma != word and not [lemma] in definitions:
                        definitions.append([lemma])
            except:
                logger.error("lemmatizer crashed on {}".format(word))
            if definitions:
                self._data[word] = definitions
        self.save() 
开发者ID:tombosc,项目名称:cpae,代码行数:17,代码来源:retrieval.py

示例11: Process

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import WordNetLemmatizer [as 别名]
def Process(data):
  lemmatizer = WordNetLemmatizer()
  return [lemmatizer.lemmatize(word.lower()) for word in word_tokenize(unicode(sentence,errors='ignore'))] 
开发者ID:PacktPublishing,项目名称:Mastering-Machine-Learning-for-Penetration-Testing,代码行数:5,代码来源:SpamDetection_NLTK.py

示例12: __init__

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import WordNetLemmatizer [as 别名]
def __init__(self, require_unique_match, lemmatizer="word_net",
                 empty_question_features=False, stop_words=None):
        self.lemmatizer = lemmatizer
        self.stop_words = stop_words
        self.empty_question_features = empty_question_features
        if lemmatizer == "word_net":
            self._lemmatizer = WordNetLemmatizer()
        else:
            raise ValueError()
        self._cache = {}
        self.require_unique_match = require_unique_match 
开发者ID:allenai,项目名称:document-qa,代码行数:13,代码来源:text_features.py

示例13: __init__

# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import WordNetLemmatizer [as 别名]
def __init__(self, lower: bool = True, stemmer="port"):
        self.lower = lower
        self.stemmer = stemmer
        if stemmer == "port":
            self._stemmer = PorterStemmer()
            self._stem = self._stemmer.stem
        elif stemmer == "wordnet":
            self._stemmer = WordNetLemmatizer()
            self._stem = self._stemmer.lemmatize
        else:
            raise ValueError(stemmer)
        # stemming is slow, so we cache words as we go
        self.normalize_cache = {} 
开发者ID:allenai,项目名称:document-qa,代码行数:15,代码来源:text_utils.py


注:本文中的nltk.WordNetLemmatizer方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。