當前位置: 首頁>>代碼示例>>Python>>正文


Python nltk.WordNetLemmatizer方法代碼示例

本文整理匯總了Python中nltk.WordNetLemmatizer方法的典型用法代碼示例。如果您正苦於以下問題:Python nltk.WordNetLemmatizer方法的具體用法?Python nltk.WordNetLemmatizer怎麽用?Python nltk.WordNetLemmatizer使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在nltk的用法示例。


在下文中一共展示了nltk.WordNetLemmatizer方法的13個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: preprocess_sentence

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import WordNetLemmatizer [as 別名]
def preprocess_sentence(sentence):
    lemmatizer = nltk.WordNetLemmatizer()
    # clearly list out our preprocessing pipeline
    processed_tokens = nltk.word_tokenize(sentence)
    processed_tokens = [w.lower() for w in processed_tokens]
    # find least common elements
    word_counts = collections.Counter(processed_tokens)
    uncommon_words = word_counts.most_common()[:-10:-1]
    # remove these tokens
    processed_tokens = [w for w in processed_tokens if w not in stop_words]
    processed_tokens = [w for w in processed_tokens if w not in uncommon_words]
    # lemmatize
    processed_tokens = [lemmatizer.lemmatize(w) for w in processed_tokens]
    return processed_tokens 
開發者ID:PacktPublishing,項目名稱:Hands-on-NLP-with-NLTK-and-scikit-learn-,代碼行數:16,代碼來源:nlp-2-spam-classification.py

示例2: transform

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import WordNetLemmatizer [as 別名]
def transform(self, input_: list) -> list:
        """
        Lemmatization a sequence of tokens.

        :param input_: list of tokens to be lemmatized.

        :return tokens: list of lemmatizd tokens.
        """
        lemmatizer = nltk.WordNetLemmatizer()
        return [lemmatizer.lemmatize(token, pos='v') for token in input_] 
開發者ID:NTMC-Community,項目名稱:MatchZoo-py,代碼行數:12,代碼來源:lemmatization.py

示例3: __init__

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import WordNetLemmatizer [as 別名]
def __init__(self, stopwords=None):
        self.stopwords  = set(stopwords or nltk.corpus.stopwords.words('english'))
        self.lemmatizer = nltk.WordNetLemmatizer() 
開發者ID:DistrictDataLabs,項目名稱:partisan-discourse,代碼行數:5,代碼來源:learn.py

示例4: lemmatize

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import WordNetLemmatizer [as 別名]
def lemmatize(tokens):
    """
    lemmatize tokens
    """
    try:
        wnl = nltk.WordNetLemmatizer()
    except LookupError:
        nltk.download('wordnet')
        wnl = nltk.WordNetLemmatizer()
    return [wnl.lemmatize(t) for t in tokens] 
開發者ID:KevinLiao159,項目名稱:Quora,代碼行數:12,代碼來源:nlp.py

示例5: cond2

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import WordNetLemmatizer [as 別名]
def cond2(w):
    """ Return word if its lemmatised form is not in the wordlist."""
    wnl = WordNetLemmatizer()
    return wnl.lemmatize(w.lower()) not in wordlist 
開發者ID:EFord36,項目名稱:normalise,代碼行數:6,代碼來源:detect.py

示例6: __init__

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import WordNetLemmatizer [as 別名]
def __init__(self):
        self.install_nltk_corpora('stopwords', 'wordnet', 'punkt')
        self.lemmatizer = nltk.WordNetLemmatizer()
        self.lemmatizer.lemmatize('')  # Force nltk lazy corpus loader to do something.
        self.tokenizer = self.make_tokenizer()
        self.stopwords = nltk.corpus.stopwords.words('english')
        self.sent_tokenizer = None 
開發者ID:lgalke,項目名稱:vec4ir,代碼行數:9,代碼來源:nltk_normalization.py

示例7: __init__

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import WordNetLemmatizer [as 別名]
def __init__(self):
        NltkNormalizer.install_nltk_corpora('averaged_perceptron_tagger')
        self.normalizer = NltkNormalizer()
        self.lem = nltk.WordNetLemmatizer()
        self.tagger = nltk.PerceptronTagger()
        self.translation_dict = {'J': wn.ADJ, 'N': wn.NOUN, 'R': wn.ADV, 'V': wn.VERB} 
開發者ID:quadflor,項目名稱:Quadflor,代碼行數:8,代碼來源:synset_analysis.py

示例8: lemmatize_term

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import WordNetLemmatizer [as 別名]
def lemmatize_term(term, pos=None):
    if pos is None:
        synsets = wordnet.synsets(term)
        if not synsets:
            return term
        pos = synsets[0].pos()
        if pos == wordnet.ADJ_SAT:
            pos = wordnet.ADJ
    return nltk.WordNetLemmatizer().lemmatize(term, pos=pos) 
開發者ID:Hironsan,項目名稱:natural-language-preprocessings,代碼行數:11,代碼來源:normalization.py

示例9: add_from_lemma_definitions

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import WordNetLemmatizer [as 別名]
def add_from_lemma_definitions(self, vocab, try_lower=False):
        """Add lemma definitions for non-lemmas.

        This code covers the following scenario: supposed a dictionary is crawled,
        but only for word lemmas.

        """
        lemmatizer = nltk.WordNetLemmatizer()
        added = 0
        for word in vocab.words:
            word_list = [word, word.lower()] if try_lower else [word]

            for word_to_lemma in word_list:
                try:
                    for part_of_speech in ['a', 's', 'r', 'n', 'v']:
                        lemma = lemmatizer.lemmatize(word_to_lemma, part_of_speech)
                        lemma_defs = self._data.get(lemma)
                        if lemma != word and lemma_defs:
                            # This can be quite slow. But this code will not be used
                            # very often.
                            for def_ in lemma_defs:
                                if not def_ in self._data[word]:
                                    added += 1
                                    self._data[word].append(def_)
                except:
                    logger.error("lemmatizer crashed on {}".format(word))
                    logger.error(traceback.format_exc())
        logger.info("Added {} new defs in add_from_lemma_definitions".format(added))
        self.save() 
開發者ID:tombosc,項目名稱:cpae,代碼行數:31,代碼來源:retrieval.py

示例10: crawl_lemmas

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import WordNetLemmatizer [as 別名]
def crawl_lemmas(self, vocab):
        """Add Wordnet lemmas as definitions."""
        lemmatizer = nltk.WordNetLemmatizer()
        for word in vocab.words:
            definitions = []
            try:
                for part_of_speech in ['a', 's', 'r', 'n', 'v']:
                    lemma = lemmatizer.lemmatize(word, part_of_speech)
                    if lemma != word and not [lemma] in definitions:
                        definitions.append([lemma])
            except:
                logger.error("lemmatizer crashed on {}".format(word))
            if definitions:
                self._data[word] = definitions
        self.save() 
開發者ID:tombosc,項目名稱:cpae,代碼行數:17,代碼來源:retrieval.py

示例11: Process

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import WordNetLemmatizer [as 別名]
def Process(data):
  lemmatizer = WordNetLemmatizer()
  return [lemmatizer.lemmatize(word.lower()) for word in word_tokenize(unicode(sentence,errors='ignore'))] 
開發者ID:PacktPublishing,項目名稱:Mastering-Machine-Learning-for-Penetration-Testing,代碼行數:5,代碼來源:SpamDetection_NLTK.py

示例12: __init__

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import WordNetLemmatizer [as 別名]
def __init__(self, require_unique_match, lemmatizer="word_net",
                 empty_question_features=False, stop_words=None):
        self.lemmatizer = lemmatizer
        self.stop_words = stop_words
        self.empty_question_features = empty_question_features
        if lemmatizer == "word_net":
            self._lemmatizer = WordNetLemmatizer()
        else:
            raise ValueError()
        self._cache = {}
        self.require_unique_match = require_unique_match 
開發者ID:allenai,項目名稱:document-qa,代碼行數:13,代碼來源:text_features.py

示例13: __init__

# 需要導入模塊: import nltk [as 別名]
# 或者: from nltk import WordNetLemmatizer [as 別名]
def __init__(self, lower: bool = True, stemmer="port"):
        self.lower = lower
        self.stemmer = stemmer
        if stemmer == "port":
            self._stemmer = PorterStemmer()
            self._stem = self._stemmer.stem
        elif stemmer == "wordnet":
            self._stemmer = WordNetLemmatizer()
            self._stem = self._stemmer.lemmatize
        else:
            raise ValueError(stemmer)
        # stemming is slow, so we cache words as we go
        self.normalize_cache = {} 
開發者ID:allenai,項目名稱:document-qa,代碼行數:15,代碼來源:text_utils.py


注:本文中的nltk.WordNetLemmatizer方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。