本文整理汇总了Python中textblob.Word方法的典型用法代码示例。如果您正苦于以下问题:Python textblob.Word方法的具体用法?Python textblob.Word怎么用?Python textblob.Word使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类textblob
的用法示例。
在下文中一共展示了textblob.Word方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: make_mine
# 需要导入模块: import textblob [as 别名]
# 或者: from textblob import Word [as 别名]
def make_mine(yours, swap_rate):
mine = []
for string_word in yours:
word_object = Word(string_word)
if random.randint(0, swap_rate - 1) == 0:
meaning_count = len(word_object.synsets)
if meaning_count > 0:
meaning_selected = random.randint(0, meaning_count - 1)
lemmas = word_object.synsets[meaning_selected].lemmas()
synonym_count = len(lemmas)
mine += [lemmas[random.randint(0, synonym_count - 1)].name()]
else:
mine += [string_word]
else:
mine += [string_word]
return ' '.join(mine)
示例2: get_synsets
# 需要导入模块: import textblob [as 别名]
# 或者: from textblob import Word [as 别名]
def get_synsets(text):
return Word(to_lemma(text)).synsets
示例3: get_lemmas
# 需要导入模块: import textblob [as 别名]
# 或者: from textblob import Word [as 别名]
def get_lemmas(text):
word = Word(to_lemma(text))
sets = map(set, [synset.lemma_names()
for synset in word.synsets])
return map(from_lemma, reduce(operator.or_, sets))
示例4: normalize
# 需要导入模块: import textblob [as 别名]
# 或者: from textblob import Word [as 别名]
def normalize(self, word, tag="N"):
"""
Normalizes word using given tag. If no tag is given, NOUN is assumed.
"""
kind = NOUN
if tag.startswith("V"):
kind = VERB
elif tag.startswith("RB"):
kind = ADV
elif tag.startswith("J"):
kind = ADJ
return Word(word).lemmatize(kind).lower()
示例5: resolution
# 需要导入模块: import textblob [as 别名]
# 或者: from textblob import Word [as 别名]
def resolution(surfaceName):
'''
input: a surface name of entity
output: the "normalized" entity name
process: 1) lowercase
2) lemmatization
'''
tmp = [Word(ele.lower()).lemmatize() for ele in surfaceName.split()]
# tmp = [ele.lower() for ele in surfaceName.split()]
return " ".join(tmp)
示例6: iter_filth
# 需要导入模块: import textblob [as 别名]
# 或者: from textblob import Word [as 别名]
def iter_filth(self, text):
# find 'skype' in the text using a customized tokenizer. this makes
# sure that all valid skype usernames are kept as tokens and not split
# into different words
tokenizer = nltk.tokenize.regexp.RegexpTokenizer(
self.filth_cls.SKYPE_TOKEN
)
blob = textblob.TextBlob(text, tokenizer=tokenizer)
skype_indices, tokens = [], []
for i, token in enumerate(blob.tokens):
tokens.append(token)
if 'skype' in token.lower():
skype_indices.append(i)
# go through the words before and after skype words to identify
# potential skype usernames.
skype_usernames = []
for i in skype_indices:
jmin = max(i-self.word_radius, 0)
jmax = min(i+self.word_radius+1, len(tokens))
for j in list(range(jmin, i)) + list(range(i+1, jmax)):
token = tokens[j]
if self.filth_cls.SKYPE_USERNAME.match(token):
# this token is a valid skype username. Most skype
# usernames appear to be misspelled words. Word.spellcheck
# does not handle the situation of an all caps word very
# well, so we cast these to all lower case before checking
# whether the word is misspelled
if token.isupper():
token = token.lower()
word = textblob.Word(token)
suggestions = word.spellcheck()
corrected_word, score = suggestions[0]
if score < 0.5:
skype_usernames.append(token)
# replace all skype usernames
if skype_usernames:
self.filth_cls.regex = re.compile('|'.join(skype_usernames))
else:
self.filth_cls.regex = None
return super(SkypeDetector, self).iter_filth(text)
示例7: keywords
# 需要导入模块: import textblob [as 别名]
# 或者: from textblob import Word [as 别名]
def keywords(self, keywords):
try:
if keywords is not None:
if not (isinstance(keywords, list) or isinstance(keywords, WordList)):
raise TypeError('supplied keyword object of type that is not list or TextBlob.WordList')
else:
if isinstance(keywords, list):
keywords = [Word(word.lower()) for word in keywords]
else:
raise ValueError('no input keywords supplied')
# normalize case
words = [word.lower() for word in keywords]
# remove all stopwords
stopwords = sw.words("english")
words = [word for word in words if word not in stopwords]
#words = [word for word in keywords]
nwords = []
for word in words:
if word in keyword_filterlist:
#if word.string in keyword_filterlist:
continue
for term in self.global_filterlist:
#if word.string in term:
if word in term:
pass
#break
else:
nwords.append(word)
# remove plural, reduce to stems
# textblob breaks possessives and other contractions into
# two distinct words, but sometimes leaves a trailing unicode
# apostrophe - if so, strip it
words = [word.strip(u'\u2019') for word in nwords]
return words
except Exception as e:
raise e