本文整理汇总了Python中textblob.TextBlob类的典型用法代码示例。如果您正苦于以下问题:Python TextBlob类的具体用法?Python TextBlob怎么用?Python TextBlob使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了TextBlob类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _translate_message
def _translate_message(bot, broadcast_list, context):
if context and "autotranslate" in context:
_autotranslate = context["autotranslate"]
origin_language = _get_room_language(bot, _autotranslate["conv_id"])
for send in broadcast_list:
target_conversation_id = send[0]
response = send[1]
target_language = _get_room_language(bot, target_conversation_id)
if origin_language != target_language:
logger.debug("translating {} to {}".format(origin_language, target_language))
translated = _autotranslate["event_text"]
try:
en_blob = TextBlob(_autotranslate["event_text"])
translated = "{0}".format(en_blob.translate(to=target_language))
#translated = gs.translate(_autotranslate["event_text"], target_language
except Exception:
logger.debug("Translation Api returned string unchanged")
else:
pass
finally:
if _autotranslate["event_text"] != translated:
# mutate the original response by reference
response.extend([
hangups.ChatMessageSegment('\n', hangups.SegmentType.LINE_BREAK),
hangups.ChatMessageSegment('(' + translated + ')')])
示例2: check_speech_patterns
def check_speech_patterns(text):
PATTERNS={
("PRP","DT"),
("CC","VBD"),
("VB","RB"),
("VB","PRP$"),
("NN","POS"),
("NN","MD","VB"),
("VB","PRP$","NN"),
("MD","VB","VBN"),
("NN","IN","PRP$"),
("IN","PRP$","JJ"),
("VB","PRP","DT","NN"),
("VBD","RB","JJ","NNS"),
("NNP","NNP","NNP","NNP"),
("PRP$","NN","CC","PRP"),
("NNP", "NNP", "NNP", "NNP", "NNP"),
("NN", "IN", "DT", "NNS", "IN"),
("PRP$", "NN", "IN", "DT", "NN"),
("IN", "DT", "NN", "WDT", "VBZ"),
("NN", "IN", "PRP$", "JJ", "NN"),
("DT", "NN", "IN", "NN", "NN")
}
blob= TextBlob(text)
for i in range (2,6):
ngrams=blob.ngrams(n=i)
for gram in ngrams:
str_gram=" ".join(gram)
gram_blob=TextBlob(str_gram)
tags=gram_blob.tags
lst1, lst2 = zip(*tags)
if lst2 in PATTERNS:
return True
return False
示例3: scrape
def scrape(self,links=[],ads=True,translator=False):
responses = []
values = {}
data = []
if ads:
for link in links:
r = requests.get(link)
responses.append(r)
else:
for link in links:
r = requests.get(link)
text = unidecode(r.text)
html = lxml.html.fromstring(text)
links = html.xpath("//div[@class='cat']/a/@href")
for link in links:
if len(self.base_urls) > 1 or len(self.base_urls[0]) > 3:
time.sleep(random.randint(5,27))
try:
responses.append(requests.get(link))
print link
except requests.exceptions.ConnectionError:
print "hitting connection error"
continue
for r in responses:
text = r.text
html = lxml.html.fromstring(text)
values["title"] = html.xpath("//div[@id='postingTitle']/a/h1")[0].text_content()
values["link"] = unidecode(r.url)
values["new_keywords"] = []
try:
values["images"] = html.xpath("//img/@src")
except IndexError:
values["images"] = "weird index error"
pre_decode_text = html.xpath("//div[@class='postingBody']")[0].text_content().replace("\n","").replace("\r","")
values["text_body"] = pre_decode_text
try:
values["posted_at"] = html.xpath("//div[class='adInfo']")[0].text_content().replace("\n"," ").replace("\r","")
except IndexError:
values["posted_at"] = "not given"
values["scraped_at"] = str(datetime.datetime.now())
body_blob = TextBlob(values["text_body"])
title_blob = TextBlob(values["title"])
values["language"] = body_blob.detect_language() #requires the internet - makes use of google translate api
values["polarity"] = body_blob.polarity
values["subjectivity"] = body_blob.sentiment[1]
if values["language"] != "en" and not translator:
values["translated_body"] = body_blob.translate(from_lang="es")
values["translated_title"] = title_blob.translate(from_lang="es")
else:
values["translated_body"] = "none"
values["translated_title"] = "none"
text_body = values["text_body"]
title = values["title"]
values["phone_numbers"] = self.phone_number_parse(values)
data.append(values)
return data
示例4: findLanguage
def findLanguage(reducedList3):
languageMap = {}
currentNumber = 0
shuffle(reducedList3)
for i in reducedList3:
if currentNumber < 5000:
if len(i[0]) > 5:
try:
b = TextBlob(unicode(i[0]))
currentLanguage = b.detect_language()
if currentLanguage in languageMap:
languageMap[currentLanguage] += 1
else:
languageMap[currentLanguage] = 1
except:
pass
currentNumber += 1
print currentNumber
listOfWords = []
for i in languageMap:
for x in range(0, languageMap[i]):
listOfWords.append(i)
listOfWordsCounter = collections.Counter(listOfWords)
print 'Best Languages:', listOfWordsCounter.most_common(5)
print languageMap
示例5: extract
def extract(ngrams, dataset, doc_id):
# extract keywords
print 'Extracting keywords'
for i, ngram in enumerate(ngrams):
doc = doc_id[i]
if field not in dataset[doc]:
dataset[doc][field] = set()
if doc > 0 and doc % 1000 == 0:
print '\t', doc
for kw in filter(lambda k: '_' in k, ngram):
keyword = kw.replace('_', ' ')
kw_tb = TextBlob(keyword)
# filter out punctuation, etc (make sure that there are two non-punc words)
if len(kw_tb.words) < 2:
continue
# add keywords which are all proper nouns
distinct_tags = set(t[1] for t in kw_tb.tags)
if distinct_tags - {'NNP', 'NNPS'} == {}:
dataset[doc][field].add(kw_tb.lower())
continue
# add noun phrases
for np in kw_tb.lower().noun_phrases:
dataset[doc][field].add(np)
return kw_set_to_list(dataset)
示例6: tag_documents_text
def tag_documents_text(client):
documents = client['cornell']['documents']
for doc in documents.find():
blob = TextBlob(doc['text'], pos_tagger=PerceptronTagger())
parsed_blob = blob.parse()
documents.update({'name':doc['name']},{'$set':{'parsed_perceptron':parsed_blob}})
示例7: update_book
def update_book(book):
blob = TextBlob(book.description)
if blob.detect_language() == 'en':
description = ''
nouns = filter(lambda x: x[1] == 'NN' or x[1] == 'NNP', blob.tags)
for noun, tag in nouns:
description += noun + " "
if len(noun) > 2:
description += TextBlob(noun).translate(to='ko').string + " "
else:
description = book.description
book_document = search.Document(
doc_id=book.ISBN,
fields=[
search.TextField(name='title', value=remove_punc(book.title)),
search.TextField(name='author', value=remove_punc(book.author)),
search.TextField(name='description', value=remove_punc(description))
]
)
index = get_book_index()
index.put(book_document)
示例8: process_status
def process_status(status, lang):
text = ""
# translate
if lang == 'en':
text = status['text']
else:
blob = TextBlob(status['text'])
try:
text = str(blob.translate())
except textblob.exceptions.NotTranslated:
text = status['text']
# sentiment analysis
sentiment = TextBlob(text).sentiment
return {
"created_at": 1000 * int(time.mktime((status['created_at']).timetuple()))
, "id_str": status['id_str']
, "text": text
, "sentiment": {"polarity": sentiment[0], "subjectivity": sentiment[1]}
, "retweet_count": status['retweet_count']
, "in_reply_to_status_id_str": status['in_reply_to_status_id_str']
, "geo": status['geo']
, "retweeted": status['retweeted']
, "in_reply_to_user_id_str": status['in_reply_to_user_id_str']
}
示例9: matchRhyme
def matchRhyme(word1,word2):
#str1 = "tekst"
#word1+="टेक्स्ट"
str1 = ""
str2 = ""
word1+= "टेक्स्ट"
word2+= "टेक्स्ट"
str1 += " " + word1
str2 += " " + word2
hindi_blob1 = TextBlob(str1)
hindi_blob2 = TextBlob(str2)
transliteratedtxt1 = hindi_blob1.translate(from_lang="hi", to='en')
transliteratedtxt1=transliteratedtxt1.substring[:-5]
transliteratedtxt2 = hindi_blob2.translate(from_lang="hi", to='en')
transliteratedtxt2= transliteratedtxt2.substring[:-5]
word1Index= len(transliteratedtxt1)
word2Index= len(transliteratedtxt2)
##Matcing last charater if they are same!!
if (transliteratedtxt1[word1Index-1] == transliteratedtxt2[word2Index-1]):
#rhymeMeter=3;
##Matching if second Last character is any of the Matras!!
if ( ((transliteratedtxt1[word1Index-2]=='a') and (transliteratedtxt2[word2Index-2]=='a')) or ((transliteratedtxt1[word1Index-2]=='e') and (transliteratedtxt2[word2Index-2]=='e'))or ((transliteratedtxt1[word1Index-2]=='o') and (transliteratedtxt2[word2Index-2]=='o')) or ((transliteratedtxt1[word1Index-2]=='i') and (transliteratedtxt2[word2Index-2]=='i')) or ((transliteratedtxt1[word1Index-2]=='u') and (transliteratedtxt2[word2Index-2]=='u')) ):
rhymeMeter=5
else:
if(transliteratedtxt1[word1Index-2]!=transliteratedtxt1[word1Index-2]):
rhymeMeter=4
return rhymeMeter
示例10: _german
def _german(self, text):
blob = TextBlob(text)
try:
return str(blob.translate(to="en"))
except:
return text
示例11: on_command
def on_command(self, msg, stdin, stdout, reply):
# pre-process args
# this might mess up if "from" or "to" is left out and
# the message contains "from" or "to"
self._push_character(msg["args"], "from", "-", 1)
self._push_character(msg["args"], "to", "-", 1)
try:
args = self.parser.parse_args(msg["args"][1:])
except (argparse.ArgumentError, SystemExit):
return
# get message from the appropriate place
if args.message:
message = " ".join(args.message)
else:
message = stdin.read().strip()
# translate
from_lang = args.from_language
to_lang = args.to_language
message = TextBlob(message)
try:
translated = message.translate(from_lang=from_lang, to=to_lang)
except:
pass
else:
print(translated, file=stdout)
示例12: getKeywords
def getKeywords(text, useless):
text = TextBlob(text)
for word in text.words:
for bad in useless:
if word is bad:
text.remove(word)
return text
示例13: gen_translate
def gen_translate(msg, fromlang=None, outputlang='en'):
try:
blob = TextBlob(msg)
blob = blob.translate(from_lang=fromlang, to=outputlang)
return str(blob)
except NotTranslated:
return msg
示例14: getEntities
def getEntities(parser, tweet, xEntities):
try:
spacyParsedObject = parser(tweet)
sentence = TextBlob(tweet)
textblobTaggedObject = sentence.parse().split()
patterntaggedObject = tag(tweet, tokenize=True)
for word in patterntaggedObject:
word, wordtag=word
if wordtag == "NNP" or wordtag == "NN" or wordtag == "PRP":
v = str(word)
v = v.strip()
if(v not in xEntities):
xEntities[v]=str(wordtag)
for taggedObject in textblobTaggedObject:
for word in taggedObject:
word, wordtag=word[0], word[1]
if wordtag == "NNP" or wordtag == "NN" or wordtag == "PRP":
v = str(word)
v = v.strip()
if(v not in xEntities):
xEntities[v]=str(wordtag)
for word in spacyParsedObject:
if word.tag_ == "NNP" or word.tag_ == "NN" or word.tag_ == "PRP":
v = str(word)
v = v.strip()
if(v not in xEntities):
xEntities[v]=str(word.tag_)
return xEntities
except Exception as e:
return e
示例15: hi
def hi(bot, trigger):
lang_codes = ['af', 'ga', 'sq', 'it', 'ar', 'ja', 'az', 'kn', 'eu', 'ko', 'bn', 'la', 'en']
trans = TextBlob('Greetings dear '+trigger.nick+'on the road of life ')
ind = randint(0, 12)
trans = trans.translate(to=lang_codes[ind])
saying = str(trans)
bot.say(saying)