本文整理汇总了Python中textblob.TextBlob.lower方法的典型用法代码示例。如果您正苦于以下问题:Python TextBlob.lower方法的具体用法?Python TextBlob.lower怎么用?Python TextBlob.lower使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类textblob.TextBlob
的用法示例。
在下文中一共展示了TextBlob.lower方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: extract
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
def extract(ngrams, dataset, doc_id):
# extract keywords
print 'Extracting keywords'
for i, ngram in enumerate(ngrams):
doc = doc_id[i]
if field not in dataset[doc]:
dataset[doc][field] = set()
if doc > 0 and doc % 1000 == 0:
print '\t', doc
for kw in filter(lambda k: '_' in k, ngram):
keyword = kw.replace('_', ' ')
kw_tb = TextBlob(keyword)
# filter out punctuation, etc (make sure that there are two non-punc words)
if len(kw_tb.words) < 2:
continue
# add keywords which are all proper nouns
distinct_tags = set(t[1] for t in kw_tb.tags)
if distinct_tags - {'NNP', 'NNPS'} == {}:
dataset[doc][field].add(kw_tb.lower())
continue
# add noun phrases
for np in kw_tb.lower().noun_phrases:
dataset[doc][field].add(np)
return kw_set_to_list(dataset)
示例2: __init__
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
class SexxiBot:
""" Main ChatBot class to take in user input and return an appropriate response.
Contains methods: fix_typos, to correct any user's typos;
help_check, to check if the user has asked for 'help' (a list of possible commands);
check_phrase_similarity, to compare user inputs to keywords to generate basic responses;
create_response, to generate a new response based on the users input.
"""
def __init__(self):
self.user_input = str()
self.input_len = int()
self.response = str()
def fix_typos(self):
self.user_input = TextBlob(self.user_input.lower()).tags
# Fix lazy user typos, or slang
words = list()
for i in self.user_input:
words.append(i[0])
for part in range(len(words)):
if words[part] in slang_typo_dict.keys():
words[part] = slang_typo_dict[words[part]]
self.user_input = ' '.join(words)
return False # Returns false to move on to help_check
def help_check(self):
if self.user_input.lower() == "help":
self.response = responses.HELP
return True
return False # User didn't ask for help, move on to check_phrase_similarity
def check_phrase_similarity(self):
self.user_input = TextBlob(self.user_input.lower()).tags
self.input_len = len(self.user_input)
for phrase_type in PHRASE_TYPES:
for phrase in getattr(keywords, phrase_type):
score = float()
for word in self.user_input:
for n in phrase:
if word and n not in unimportant_words:
score += liquidmetal.score(n, word[0]) / self.input_len
if score >= 0.7: # Could be increased/ decreased through testing to find more optimal value
self.response = random.choice(getattr(responses, phrase_type))
return True
return False
def create_response(self): # NOT WORKING YET!
# Craft a response based on user's message
noun, pronoun, verb, adj, prep, text_len = check_pos_tags.pos_tags(self.user_input)
self.response = format_response.craft_response(noun, pronoun, verb, adj, prep, text_len)
print self.response
return False if self.response == ' ' else True
示例3: fit
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
def fit(self, X, y=None):
words = []
for x in X:
x = TextBlob(x.lower())
words += [word.lemmatize() for word in x.words]
if self.num_words:
words = Counter(words)
self._vocab = [word for word, _ in words.most_common(self.num_words)]
else:
self._vocab = list(set(words))
return self
示例4: transform
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
def transform(self, X):
vectors = []
for x in X:
x = TextBlob(x.lower())
word_count = Counter(x.words)
vector = [0] * len(self._vocab)
for word, count in word_count.items():
try:
idx = self._vocab.index(word)
vector[idx] = count
except ValueError:
pass
vectors.append(vector)
return vectors
示例5: textblob_ngrams
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
def textblob_ngrams(sentence, n=3, remove_stopwords=False, all_lower_case=False):
''' Takes in a sentence returns the words and/or punctuation
in that sentence as the features (depending on chosen tokenizer)
@Arguments:
sentence -- Chosen sentence to tokenize, type(sentence) = String
tokenizer (optional) -- Function of type nlkt.tokenize to be used
for breaking apart the sentence string. Standard tokenizer
splits on whitespace and removes punctuation
remove_stopwords (optional) -- if true, all stopwords in sentence
will not be included as features. Currently only for English
text. Value is initially false
stemmer (optional) -- Function of type nltk.stem to be used for
stemming word features.
@Return:
List of features of the following form:
{ngram_1: True, ngram_2: True, ... , ngram_n: True}
'''
sentence = TextBlob(sentence)
features = dict()
clean_string = ''
# Changes all word features to lower case if true
if all_lower_case:
sentence = sentence.lower()
# Removes stopwords
for word in sentence.words:
# Removes word from features if in nlkt.corpus.stopwords('english')
if remove_stopwords:
if word.string in stopwords:
continue
clean_string += ''.join([word, ' '])
for ngram in TextBlob(clean_string):
features[ngram] = True
return features
示例6: hello_monkey
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
def hello_monkey():
"""Respond to incoming calls with a simple text message."""
resp = twiml.Response()
message=""
name=""
fromNumber = request.values.get('From',None)
myNumber = request.values.get('To',None)
body = request.values.get('Body')
body = body.decode("ascii", errors="ignore")
blob = TextBlob(body)
NLPObject = NLPStuff(resp, blob, message)
counter = storeCookies(blob)
message+= salutationToCaller(message, fromNumber, myNumber, counter)
if "help" in blob.lower():
message="This is an information HELP message please tell me what to do"
return setMessage(message, name, myNumber, counter, body, blob, resp)
示例7: feature_extractor
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
def feature_extractor(text):
if not isinstance(text, TextBlob):
text = TextBlob(text.lower())
return {
'has_rumor': 'rumor' in text.words,
'has_gosip': 'gosip' in text.words,
'has_urbanesia': 'urbanesia' in text.words,
'has_batista': 'batista' in text.words,
'has_harahap': 'harahap' in text.words,
'has_pemasaran': 'pemasaran' in text.words,
'has_saham': 'saham' in text.words,
'has_hackathon': 'hackathon' in text.words,
'has_ipo': 'ipo' in text.words,
'has_akuisisi': 'akuisisi' in text.words,
'has_startup': 'startup' in text.words,
'has_android': 'android' in text.words,
'has_aplikasi': 'aplikasi' in text.words,
'has_payment': 'payment' in text.words,
'has_pembayaran': 'pembayaran' in text.words,
'has_api': 'api' in text.words,
'has_kompetisi': 'kompetisi' in text.words,
'has_ide': 'ide' in text.words,
'has_permainan': 'permainan' in text.words,
'has_game': 'game' in text.words,
'has_fundraising': 'fundraising' in text.words,
'has_askds': '[[email protected]]' in text.words,
'has_investasi': 'investasi' in text.words,
'has_musik': 'musik' in text.words,
'has_lagu': 'lagu' in text.words,
'has_bhinneka': 'bhinneka' in text.words,
'has_marketplace': 'marketplace' in text.words,
'has_mobile': 'mobile' in text.words,
'has_cto': 'cto' in text.words,
'has_traffic': 'traffic' in text.words,
'starts_with_[': text[0] == '['
}
示例8: filter
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
if doc > 0 and doc % 1000 == 0:
print '\t', doc
for kw in filter(lambda k: '_' in k, ngram):
keyword = kw.replace('_', ' ')
kw_tb = TextBlob(keyword)
# filter out punctuation, etc (make sure that there are two non-punc words)
if len(kw_tb.words) < 2:
continue
# add keywords which are all proper nouns
distinct_tags = set(t[1] for t in kw_tb.tags)
if distinct_tags - {'NNP', 'NNPS'} == {}:
dataset[doc][field].add(kw_tb.lower())
continue
# add noun phrases
for np in kw_tb.lower().noun_phrases:
dataset[doc][field].add(np)
# convert set into list for json serialization
for d in dataset:
d[field] = list(d[field])
# fix 's
for i, np in enumerate(d[field]):
if np.endswith(" 's"):
np = np[:-3]
示例9: TextBlob
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
# save it as a TextBlob object
review = TextBlob(yelp_best_worst.text[0])
# list the words
review.words
# list the sentences
review.sentences
# some string methods are available
review.lower()
# ## Part 6: Stemming and Lemmatization
# **Stemming:**
#
# - **What:** Reduce a word to its base/stem/root form
# - **Why:** Often makes sense to treat related words the same way
# - **Notes:**
# - Uses a "simple" and fast rule-based approach
# - Stemmed words are usually not shown to users (used for analysis/indexing)
# - Some search engines treat words with the same stem as synonyms
# initialize stemmer
stemmer = SnowballStemmer('english')
示例10: TextBlob
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
now = datetime.datetime.now()
normSource = normmd.tables.get('Source')
sources = [dict(row) for row in normcon.execute(select([
normSource.c.Id,
normSource.c.Name,
normSource.c.Content
]).where(
normSource.c.Content.isnot(None)
))]
lemmafrequency = {}
for source in sources:
if args.verbosity > 1:
print "Reading source: " + source['Name']
content = TextBlob(source['Content'])
noun_phrases = content.lower().noun_phrases
lemmas = noun_phrases.lemmatize()
for lemma in lemmas:
if lemma in lemmafrequency.keys():
lemmafrequency[lemma] += 1
else:
lemmafrequency[lemma] = 1
if args.limit > 0:
args.limit -= 1
if args.limit == 0:
break
normNode = normmd.tables.get('Node')
normTagging = normmd.tables.get('Tagging')
nounPhraseNode = normcon.execute(select([
示例11: tweet_to_feat
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
def tweet_to_feat(tweet, features):
tb = TextBlob(tweet)
#lang = tb.detect_language()
words = [word.lemma for word in tb.lower().tokenize()]
return [words.count(feature) for feature in features]
示例12: find_tweet
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
def find_tweet(tweet, place_list):
t = TextBlob(unicode(tweet))
tweet_loc = []
for word in t.lower().tokenize():
if word in place_list:
tweet_loc = tweet_loc + [word]
示例13: WordNetLemmatizer
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
from __future__ import print_function
from textblob import TextBlob
from nltk.stem.wordnet import WordNetLemmatizer
import sys
lmtzr = WordNetLemmatizer()
for line in sys.stdin.readlines():
blob = TextBlob(line.strip())
sys.stdout.write("Detected language: {}\n".format(blob.detect_language()))
sys.stdout.write("This message had {} words.\n".format(len(blob.words)))
sys.stdout.write("Corrected sentence\n{}\n".format(blob.lower().correct()))
proper_nouns = [tag[0] for tag in blob.tags if tag[1] == 'NNP']
verbs = [lmtzr.lemmatize(tag[0], 'v') for tag in blob.tags if 'V' in tag[1]]
sys.stdout.write("I found these proper nouns: {}\n".format(proper_nouns))
sys.stdout.write("I found these verbs: {}\n".format(verbs))
sentiment = blob.sentiment
sys.stdout.write("Sentiment for that message: {}\n".format(sentiment))
if sentiment.polarity > 0 and sentiment.subjectivity > 0.7:
sys.stdout.write("That sounds amazing!\n")
elif sentiment.polarity < 0 and sentiment.subjectivity > 0.7:
sys.stdout.write("It'll get better.\n")
else:
sys.stdout.write("Meh.\n")
sys.stdout.flush()