本文整理汇总了Python中textblob.TextBlob.detect_language方法的典型用法代码示例。如果您正苦于以下问题:Python TextBlob.detect_language方法的具体用法?Python TextBlob.detect_language怎么用?Python TextBlob.detect_language使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类textblob.TextBlob
的用法示例。
在下文中一共展示了TextBlob.detect_language方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: split_sentence_based_on_verbs
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def split_sentence_based_on_verbs(reviewText):
review_spacy = nlp(reviewText)
review_textblob = TextBlob(reviewText)
if not review_textblob.detect_language() == 'en':
review_textblob = review_textblob.translate(to='en')
review_spacy = nlp(review_textblob.string)
else:
contains_romanian_words = 0
for word in review_textblob.words:
word_textblob = TextBlob(word)
if len(word_textblob.string) >= 3 and word_textblob.detect_language() == 'ro':
contains_romanian_words = 1
break
if contains_romanian_words == 1:
new_reviewText = ''
for word in review_spacy:
word_textblob = TextBlob(word.orth_)
if not word.is_title and len(word_textblob.string) >= 3:
if word_textblob.detect_language() != 'ro':
new_reviewText = new_reviewText + ' ' + word_textblob.string
else:
new_word = word_textblob.translate(to='en')
new_reviewText = new_reviewText + ' ' + new_word.string
else :
new_reviewText = new_reviewText + ' ' + word_textblob.string
# only_english_words = 0
# break
review_textblob = TextBlob(new_reviewText)
review_spacy = nlp(review_textblob.string)
new_sentences = []
verbs_positions = []
for k in range(0, len(review_spacy)):
if review_spacy[k].pos == VERB and review_spacy[k].dep_ == 'ROOT':
verbs_positions.append(k)
start = 0
if len(verbs_positions) > 0:
for p in range(0, len(verbs_positions)):
if p == len(verbs_positions) - 1:
new_sentences.append(review_spacy[start:len(review_spacy)].text)
else:
q = verbs_positions[p] + 1
while q < len(review_spacy):
if review_spacy[q].is_stop and ((review_spacy[q].pos == CONJ and (q < len(review_spacy)-1 and review_spacy[q-1].pos != review_spacy[q+1].pos)) or (review_spacy[q].pos == DET and review_spacy[q].lower_ in ['the', 'this', 'those', 'which', 'other', 'another']) or (review_spacy[q].pos == PUNCT and review_spacy[q] in [',', ';'])):
new_sentences.append(review_spacy[start:q].text)
start = q
break
q += 1
else:
new_sentences.append(reviewText)
return new_sentences
示例2: translate_msg
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def translate_msg(message):
try:
if (len(message.text) > 3):
b = TextBlob(unicode(message.text))
if (b.detect_language() == "ru"):
tr_text = unicode(b.translate(to="en"))
bot.send_message(message.chat.id, tr_text)
if (b.detect_language() == "en"):
tr_text = unicode(b.translate(to="ru"))
bot.send_message(message.chat.id, tr_text)
except Exception as e:
print (e.message)
bot.send_message(message.chat.id, "Sorry Boss,can't translate :("
" Try another message, please " +
telegram.Emoji.KISSING_FACE)
示例3: findLanguage
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def findLanguage(reducedList3):
languageMap = {}
currentNumber = 0
shuffle(reducedList3)
for i in reducedList3:
if currentNumber < 5000:
if len(i[0]) > 5:
try:
b = TextBlob(unicode(i[0]))
currentLanguage = b.detect_language()
if currentLanguage in languageMap:
languageMap[currentLanguage] += 1
else:
languageMap[currentLanguage] = 1
except:
pass
currentNumber += 1
print currentNumber
listOfWords = []
for i in languageMap:
for x in range(0, languageMap[i]):
listOfWords.append(i)
listOfWordsCounter = collections.Counter(listOfWords)
print 'Best Languages:', listOfWordsCounter.most_common(5)
print languageMap
示例4: scrape
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def scrape(self,links=[],ads=True,translator=False):
responses = []
values = {}
data = []
if ads:
for link in links:
r = requests.get(link)
responses.append(r)
else:
for link in links:
r = requests.get(link)
text = unidecode(r.text)
html = lxml.html.fromstring(text)
links = html.xpath("//div[@class='cat']/a/@href")
for link in links:
if len(self.base_urls) > 1 or len(self.base_urls[0]) > 3:
time.sleep(random.randint(5,27))
try:
responses.append(requests.get(link))
print link
except requests.exceptions.ConnectionError:
print "hitting connection error"
continue
for r in responses:
text = r.text
html = lxml.html.fromstring(text)
values["title"] = html.xpath("//div[@id='postingTitle']/a/h1")[0].text_content()
values["link"] = unidecode(r.url)
values["new_keywords"] = []
try:
values["images"] = html.xpath("//img/@src")
except IndexError:
values["images"] = "weird index error"
pre_decode_text = html.xpath("//div[@class='postingBody']")[0].text_content().replace("\n","").replace("\r","")
values["text_body"] = pre_decode_text
try:
values["posted_at"] = html.xpath("//div[class='adInfo']")[0].text_content().replace("\n"," ").replace("\r","")
except IndexError:
values["posted_at"] = "not given"
values["scraped_at"] = str(datetime.datetime.now())
body_blob = TextBlob(values["text_body"])
title_blob = TextBlob(values["title"])
values["language"] = body_blob.detect_language() #requires the internet - makes use of google translate api
values["polarity"] = body_blob.polarity
values["subjectivity"] = body_blob.sentiment[1]
if values["language"] != "en" and not translator:
values["translated_body"] = body_blob.translate(from_lang="es")
values["translated_title"] = title_blob.translate(from_lang="es")
else:
values["translated_body"] = "none"
values["translated_title"] = "none"
text_body = values["text_body"]
title = values["title"]
values["phone_numbers"] = self.phone_number_parse(values)
data.append(values)
return data
示例5: update_book
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def update_book(book):
blob = TextBlob(book.description)
if blob.detect_language() == 'en':
description = ''
nouns = filter(lambda x: x[1] == 'NN' or x[1] == 'NNP', blob.tags)
for noun, tag in nouns:
description += noun + " "
if len(noun) > 2:
description += TextBlob(noun).translate(to='ko').string + " "
else:
description = book.description
book_document = search.Document(
doc_id=book.ISBN,
fields=[
search.TextField(name='title', value=remove_punc(book.title)),
search.TextField(name='author', value=remove_punc(book.author)),
search.TextField(name='description', value=remove_punc(description))
]
)
index = get_book_index()
index.put(book_document)
示例6: answer
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def answer(question):
global IsAnswer,detected,u
IsAnswer = True
DetectLang = TextBlob(question)
detected = DetectLang.detect_language()
if detected == 'en':
print("language detected: en")
u = 'en'
print(len(words),"len(words)")
low = question.lower()
questions = re.sub('[^\w]',' ',low).split() #list
BadWords(questions)
print(questions)
def writeout(words,question,IsAnswer):
r = []
if len(words) > 3000:
a1 = len(questions)
for x in range(0,a1):
words.remove(random.choice(words))
print(len(words),"len(words)")
else:
pass
os.remove('newwords.txt')
file = open('newwords.txt','w')
words.extend(questions)
r.extend(words)
s = ' '.join(r)
file.write(s)
writeout(words,question,IsAnswer)
randomthought()
else:
u = detected
print("language detected:",u)
randomthought()
示例7: review_features_romanian
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def review_features_romanian(reviewText, type):
review_spacy = nlp(reviewText)
review_textblob = TextBlob(reviewText)
review_spacy_ents = review_spacy.ents
word_features_array = []
# print(review_textblob)
if not review_textblob.detect_language() == 'en':
review_textblob = review_textblob.translate(to='en')
review_spacy = nlp(review_textblob.string)
else:
contains_romanian_words = 0
for word in review_textblob.words:
word_textblob = TextBlob(word)
if len(word_textblob.string) >= 3 and word_textblob.detect_language() == 'ro':
contains_romanian_words = 1
break
if contains_romanian_words == 1:
new_reviewText = ''
for word in review_spacy:
word_textblob = TextBlob(word.orth_)
if not word.is_title and len(word_textblob.string) >= 3:
if word_textblob.detect_language() != 'ro':
new_reviewText = new_reviewText + ' ' + word_textblob.string
else:
new_word = word_textblob.translate(to='en')
new_reviewText = new_reviewText + ' ' + new_word.string
else :
new_reviewText = new_reviewText + ' ' + word_textblob.string
review_textblob = TextBlob(new_reviewText)
review_spacy = nlp(review_textblob.string)
# print(review_spacy)w_spacy)
for i in range(len(review_spacy)):
word = review_spacy[i]
# if not word.is_stop and not word.is_punct:
if (word.pos == NOUN or (word.pos == VERB and TextBlob(word.orth_).sentiment.polarity > 0) or word.pos == ADJ or word.pos == ADV) and not word.is_punct:
# if word.pos == NOUN:
if type == labelType.Label.aspect:
word_features_array.append(word_aspect_features(review_spacy, review_textblob, review_spacy_ents, i))
elif type == labelType.Label.attribute:
word_features_array.append(word_attribute_features(review_spacy, review_textblob, review_spacy_ents, i))
elif type == labelType.Label.polarity:
word_features_array.append(word_polarity_features(review_spacy, review_textblob, review_spacy_ents, i))
elif type == labelType.Label.emotion:
word_features_array.append(word_emotion_features(review_spacy, review_textblob, review_spacy_ents, i))
return word_features_array
示例8: translate_this
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def translate_this(jenni, msg):
t_msg = TextBlob(msg.groups()[0])
from_lang = t_msg.detect_language()
if from_lang != 'en':
translated = t_msg.translate(from_lang=from_lang, to='en')
jenni.reply("{}".format(translated))
else:
return
示例9: scanForMultipleLanguages
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def scanForMultipleLanguages(target, words):
langmap = makeLangPrefixMapping()
langprefs = set()
for word in words:
blob = TextBlob(words)
detect = blob.detect_language()
if detect is not langmap[target]:
langprefs.add(detect)
return langprefs
示例10: rating
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def rating(self):
if self._rating:
return self._rating
elif len(self.text) > 3:
blob = TextBlob(self.text)
try:
if blob.detect_language() == 'en':
return round(min(max(blob.sentiment.polarity, -0.5), 0.5) * 4 + 3)
except urllib.error.HTTPError:
LOG.warning("Rating detection failed: HTTPError")
return None
示例11: find_loc
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def find_loc(p1):
if p1 != "None":
for city in cities_l:
if city in p1.lower():
return city
for k, v in cities_nn.iteritems():
if k in p1.lower():
return v
t1 = TextBlob(p1.lower())
if "la" in p1.lower() and t1.detect_language() == "en":
return "los angeles"
return "None"
示例12: getEngTag
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def getEngTag(self, tag):
"Get the tag in English"
tagName = TextBlob(tag.decode('utf-8'))
tagName = tagName.words[0].singularize()
if len(tagName) >= 3:
lang = tagName.detect_language()
if lang != 'en':
tagName = tagName.translate(from_lang=lang, to='en')
return tagName.encode('utf-8')
示例13: handle
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def handle(request):
page = request.match_info.get('page')
content = yield from fetch_page(page)
text = strip(content)
blob = TextBlob(text.decode('utf-8'))
words = list({ w for w in blob.words if len(w) > 4})
words.sort()
body = { 'sentences': len(blob.sentences),
'words': len(words),
'language': blob.detect_language(),
'blob': words }
return web.Response(body=json.dumps(body).encode('utf-8'),
content_type="application/json; charset=utf-8")
示例14: echo
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def echo(word, word_eol, userdata):
global my_language
try:
original = TextBlob(word_eol[3][1:].decode("utf-8"))
lang = original.detect_language()
nick = word[0].split("!")[0].replace(":","")
if lang != my_language:
res = original.translate(from_lang=lang, to=my_language)
if len(res) > 0:
print("\037\00312" + nick + " said: " + str(res).replace( \
"\n","") + " (From lang=%s)" % str(lang))
return hexchat.EAT_NONE
except:
return hexchat.EAT_NONE
示例15: parse_text_meta_data
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def parse_text_meta_data(self,html,values):
if self.debug: print "Processing textual information - language, polarity, subjectivity.."
body_blob = TextBlob(values["text_body"])
title_blob = TextBlob(values["title"])
values["language"] = body_blob.detect_language() #requires the internet - makes use of google translate api
values["polarity"] = body_blob.polarity
values["subjectivity"] = body_blob.sentiment[1]
if values["language"] != "en" and not translator:
values["translated_body"] = body_blob.translate(from_lang="es")
values["translated_title"] = title_blob.translate(from_lang="es")
else:
values["translated_body"] = "none"
values["translated_title"] = "none"
return values