当前位置: 首页>>代码示例>>Python>>正文


Python wordnet.WordNetLemmatizer方法代码示例

本文整理汇总了Python中nltk.stem.wordnet.WordNetLemmatizer方法的典型用法代码示例。如果您正苦于以下问题:Python wordnet.WordNetLemmatizer方法的具体用法?Python wordnet.WordNetLemmatizer怎么用?Python wordnet.WordNetLemmatizer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.stem.wordnet的用法示例。


在下文中一共展示了wordnet.WordNetLemmatizer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: is_atomic_mwe

# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def is_atomic_mwe(mwe, verb_lemma, complement_lemma, synsets):
    mwe_count = 0
    for synset in synsets:
        gloss_lemmas = set([WordNetLemmatizer().lemmatize(word) for word in synset.definition.split()])
        if verb_lemma in gloss_lemmas or complement_lemma in gloss_lemmas:
            return False
        for syn_lemma in synset.lemmas:
            if syn_lemma.name != mwe: 
                tokens = syn_lemma.name.split('_')
                for token in tokens:
                    if token == verb_lemma:
                        return False
                if len(tokens) == 2 and tokens[1] == complement_lemma:
                    return False
        else:
            mwe_count += syn_lemma.count()
    return True 
开发者ID:orenmel,项目名称:lexsub,代码行数:19,代码来源:preprocess_lst_test.py

示例2: ulterior_clean

# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def ulterior_clean(tweets, filename):
    if not os.path.exists(filename):
        stopwords = get_stopwords_list()
        lemmatizer = WordNetLemmatizer()
        filtered_tweets = []
        for tw in tweets:
            filtered_tweet = []
            for t in tw.split():
                token = t.lower()
                if token in stopwords:
                    continue
                filtered_token = lemmatizer.lemmatize(token, 'v')
                filtered_token = lemmatizer.lemmatize(filtered_token)
                filtered_tweet.append(filtered_token)
            filtered_tweets.append(' '.join(filtered_tweet))
        utils.save_file(filtered_tweets, filename)
    # Load the filtered tokens
    filtered_tweets = utils.load_file(filename)
    return filtered_tweets 
开发者ID:MirunaPislar,项目名称:Sarcasm-Detection,代码行数:21,代码来源:data_processing.py

示例3: __init__

# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def __init__(self, save_dir, config_path='./miner.yaml'):
        super(ImageNetMiner, self).__init__(save_dir, config_path)
        self.__engine__ = 'imagenet'
        self.format_url = 'http://www.image-net.org/api/text/imagenet.synset.geturls?wnid={}'

        # maximum number of synsets to retrieve - we don't need all images necessarily, other-
        # wise we get enormous amounts of synsets for words like 'entity' or 'animal'
        self.max_synsets = 10000

        self.wnl = WordNetLemmatizer()

        # url cache
        self.imgnet_url_cache = {}

        # whether we "level up" in hierarchy if no images found
        self.level_up_if_no_images = True 
开发者ID:douwekiela,项目名称:mmfeat,代码行数:18,代码来源:imagenet.py

示例4: convert_to_vw

# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def convert_to_vw(text):
    tokenizer = nltk.RegexpTokenizer(r'\w+')
    lmtzr = WordNetLemmatizer()
    tokens = [t.lower() for t in tokenizer.tokenize(text)]
    id_ = 13371337
    processed = []
    for t in tokens:
        l = lmtzr.lemmatize(t)
        processed.append(l)
    counted = Counter(processed)
    res_str = str(id_)
    for k, v in counted.items():
        if v != 1:
            res_str = res_str + " {}:{}".format(k, v)
        else:
            res_str = res_str + " {}".format(k)
    return res_str 
开发者ID:sld,项目名称:convai-bot-1337,代码行数:19,代码来源:tokenizing.py

示例5: extract_lemmatized_nouns

# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def extract_lemmatized_nouns(self, new_review):
        stopwords = self.load_stopwords()
        words = []

        sentences = nltk.sent_tokenize(new_review.lower())
        for sentence in sentences:
            tokens = nltk.word_tokenize(sentence)
            text = [word for word in tokens if word not in stopwords]
            tagged_text = nltk.pos_tag(text)

            for word, tag in tagged_text:
                words.append({"word": word, "pos": tag})

        lem = WordNetLemmatizer()
        nouns = []
        for word in words:
            if word["pos"] in ["NN", "NNS"]:
                nouns.append(lem.lemmatize(word["word"]))

        return nouns 
开发者ID:vladsandulescu,项目名称:topics,代码行数:22,代码来源:predict.py

示例6: __init__

# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def __init__(self, stoplist=None, punct=None, lemmatizer=None):
        # Load stopwords, punctuation, and lemmatizer
        # This takes a bit of work, so we only want to do it once!
        self.stopwords   = stoplist or stopwords.words('english')
        self.punctuation = punct or string.punctuation
        self.lemmatizer  = lemmatizer or WordNetLemmatizer() 
开发者ID:georgetown-analytics,项目名称:product-classifier,代码行数:8,代码来源:features.py

示例7: __init__

# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def __init__(self):
        self.lmtzr = WordNetLemmatizer()
        self.aux_verbs = ['be'] 
开发者ID:davidsbatista,项目名称:Snowball,代码行数:5,代码来源:ReVerb.py

示例8: getLemma

# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def getLemma(text, contextFlag=False):
	lemmatizer = WordNetLemmatizer()
	#'NN':wordnet.NOUN,'JJ':wordnet.ADJ,'VB':wordnet.VERB,'RB':wordnet.ADV
	wordnet_tag ={'NN':'n','JJ':'a','VB':'v','RB':'r'}
	result = None
	if text.split() == 1: # on word
		tokenized = word_tokenize(t)
		tagged = pos_tag(tokenized)[0]
		lemma = ''
		try: lemma = lemmatizer.lemmatize(tagged[0],wordnet_tag[tagged[1][:2]])
		except: lemma = lemmatizer.lemmatize(tagged[0])
		result = lemma
	elif text.split() > 1 and contextFlag == True: # mutiple words i.e. text and without considering the context
		resultList = []
		for t in text.split():
			tokenized = word_tokenize(t)
			tagged = pos_tag(tokenized)[0]
			lemma = ''
			try: lemma = lemmatizer.lemmatize(tagged[0],wordnet_tag[tagged[1][:2]])
			except: lemma = lemmatizer.lemmatize(tagged[0])
			resultList.append(lemma)
		result = ' '.join(resultList)
	else: # mutiple words i.e. text and consider the context
		resultList = []
		tokens = word_tokenize(text)
		tagged = pos_tag(tokens)
		for t in tagged:
			try: resultList.append(lemmatizer.lemmatize(t[0],wordnet_tag[t[1][:2]]))
			except: resultList.append(lemmatizer.lemmatize(t[0]))
		result = ' '.join(resultList)
	return result
###################################################################################

# Given a Naive Bayes classifier, classify a text with a given certaintaity 
开发者ID:motazsaad,项目名称:comparable-text-miner,代码行数:36,代码来源:textpro.py

示例9: __init__

# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def __init__(self):
        self.stopwords = set(stopwords.words("english"))
        self.lemmatizer = WordNetLemmatizer() 
开发者ID:DongjunLee,项目名称:quantified-self,代码行数:5,代码来源:disintegrator.py

示例10: verbs2basicform

# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def verbs2basicform(words):
    ret = []
    for w in words:
        analysis = wn.synsets(w)
        if any([a.pos() == 'v' for a in analysis]):
            w = WordNetLemmatizer().lemmatize(w, 'v')
        ret.append(w)
    return ret 
开发者ID:bbrattoli,项目名称:ZeroShotVideoClassification,代码行数:10,代码来源:auxiliary_word2vec.py

示例11: parseSentence

# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def parseSentence(line):
    lmtzr = WordNetLemmatizer()
    stop = stopwords.words('english')
    text_token = CountVectorizer().build_tokenizer()(line.lower())
    text_rmstop = [i for i in text_token if i not in stop]
    text_stem = [lmtzr.lemmatize(w) for w in text_rmstop]
    return text_stem 
开发者ID:madrugado,项目名称:Attention-Based-Aspect-Extraction,代码行数:9,代码来源:preprocess.py

示例12: __init__

# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def __init__(self, language='english'):
        self.stopwords  = frozenset(nltk.corpus.stopwords.words(language))
        self.lemmatizer = WordNetLemmatizer() 
开发者ID:foxbook,项目名称:atap,代码行数:5,代码来源:normalize.py

示例13: __init__

# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def __init__(self, language='english'):
        self.stopwords  = set(nltk.corpus.stopwords.words(language))
        self.lemmatizer = WordNetLemmatizer() 
开发者ID:foxbook,项目名称:atap,代码行数:5,代码来源:transformers.py

示例14: __init__

# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def __init__(self, language='english', minimum=2, maximum=200):
        self.min = minimum
        self.max = maximum
        self.stopwords  = set(nltk.corpus.stopwords.words(language))
        self.lemmatizer = WordNetLemmatizer() 
开发者ID:foxbook,项目名称:atap,代码行数:7,代码来源:transformer.py

示例15: unify_word

# 需要导入模块: from nltk.stem import wordnet [as 别名]
# 或者: from nltk.stem.wordnet import WordNetLemmatizer [as 别名]
def unify_word(word):  # went -> go, apples -> apple, BIG -> big
    """unify verb tense and noun singular"""
    ADJ, ADJ_SAT, ADV, NOUN, VERB = 'a', 's', 'r', 'n', 'v'
    for wt in [ADJ, ADJ_SAT, ADV, NOUN, VERB]:
        try:
            word = WordNetLemmatizer().lemmatize(word, pos=wt)
        except:
            pass
    return word.lower() 
开发者ID:WayneDW,项目名称:Sentiment-Analysis-in-Event-Driven-Stock-Price-Movement-Prediction,代码行数:11,代码来源:util.py


注:本文中的nltk.stem.wordnet.WordNetLemmatizer方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。