当前位置: 首页>>代码示例>>Python>>正文


Python TextBlob.detect_language方法代码示例

本文整理汇总了Python中textblob.TextBlob.detect_language方法的典型用法代码示例。如果您正苦于以下问题:Python TextBlob.detect_language方法的具体用法?Python TextBlob.detect_language怎么用?Python TextBlob.detect_language使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在textblob.TextBlob的用法示例。


在下文中一共展示了TextBlob.detect_language方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: split_sentence_based_on_verbs

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def split_sentence_based_on_verbs(reviewText):
    review_spacy = nlp(reviewText)
    review_textblob = TextBlob(reviewText)
    if not review_textblob.detect_language() == 'en':
        review_textblob = review_textblob.translate(to='en')
        review_spacy = nlp(review_textblob.string)
    else:
        contains_romanian_words = 0

        for word in review_textblob.words:
            word_textblob = TextBlob(word)
            if len(word_textblob.string) >= 3 and word_textblob.detect_language() == 'ro':
                contains_romanian_words = 1
                break

        if contains_romanian_words == 1:
            new_reviewText = ''
            for word in review_spacy:
                word_textblob = TextBlob(word.orth_)
                if not word.is_title and len(word_textblob.string) >= 3:
                    if word_textblob.detect_language() != 'ro':
                        new_reviewText = new_reviewText + ' ' + word_textblob.string
                    else:
                        new_word = word_textblob.translate(to='en')
                        new_reviewText = new_reviewText + ' ' + new_word.string
                else :
                    new_reviewText = new_reviewText + ' ' + word_textblob.string
                    # only_english_words = 0
                    # break
            review_textblob = TextBlob(new_reviewText)
            review_spacy = nlp(review_textblob.string)

    new_sentences = []
    verbs_positions = []
    for k in range(0, len(review_spacy)):
        if review_spacy[k].pos == VERB and review_spacy[k].dep_ == 'ROOT':
            verbs_positions.append(k)
    start = 0
    if len(verbs_positions) > 0:
        for p in range(0, len(verbs_positions)):
            if p == len(verbs_positions) - 1:
                new_sentences.append(review_spacy[start:len(review_spacy)].text)
            else:
                q = verbs_positions[p] + 1
                while q < len(review_spacy):
                    if review_spacy[q].is_stop and ((review_spacy[q].pos == CONJ and (q < len(review_spacy)-1 and review_spacy[q-1].pos != review_spacy[q+1].pos)) or (review_spacy[q].pos == DET and review_spacy[q].lower_ in ['the', 'this', 'those', 'which', 'other', 'another']) or (review_spacy[q].pos == PUNCT and review_spacy[q] in [',', ';'])):
                        new_sentences.append(review_spacy[start:q].text)
                        start = q
                        break
                    q += 1
    else:
        new_sentences.append(reviewText)
    return new_sentences
开发者ID:sarosicami,项目名称:ReviewerFlask,代码行数:55,代码来源:extract.py

示例2: translate_msg

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def translate_msg(message):
    try:
        if (len(message.text) > 3):
            b = TextBlob(unicode(message.text))
            if (b.detect_language() == "ru"):
                tr_text = unicode(b.translate(to="en"))
                bot.send_message(message.chat.id, tr_text)
            if (b.detect_language() == "en"):
                tr_text = unicode(b.translate(to="ru"))
                bot.send_message(message.chat.id, tr_text)
    except Exception as e:
        print (e.message)
        bot.send_message(message.chat.id, "Sorry Boss,can't translate :("
                                          " Try another message, please " +
                                          telegram.Emoji.KISSING_FACE)
开发者ID:IlyaAshavskiy,项目名称:Girlfriend_bot,代码行数:17,代码来源:bot.py

示例3: findLanguage

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def findLanguage(reducedList3):
	languageMap = {}
	currentNumber = 0

	shuffle(reducedList3)
	for i in reducedList3:
		if currentNumber < 5000:
			if len(i[0]) > 5:
				try:
					b = TextBlob(unicode(i[0]))
					currentLanguage = b.detect_language()
					if currentLanguage in languageMap:
						languageMap[currentLanguage] += 1
					else:
						languageMap[currentLanguage] = 1
				except: 
					pass
			currentNumber += 1
			print currentNumber

	listOfWords = []
	for i in languageMap:
		for x in range(0, languageMap[i]):
			listOfWords.append(i)

	listOfWordsCounter = collections.Counter(listOfWords)
	print 'Best Languages:', listOfWordsCounter.most_common(5)

	print languageMap
开发者ID:AbhiAgarwal,项目名称:classes,代码行数:31,代码来源:Agarwal-Homework3.py

示例4: scrape

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
    def scrape(self,links=[],ads=True,translator=False):
        responses = []
        values = {}
        data = []
        
        if ads:
            for link in links:
                r = requests.get(link)
                responses.append(r)
        else:
            for link in links:
                r = requests.get(link)
                text = unidecode(r.text)
                html = lxml.html.fromstring(text)

                links = html.xpath("//div[@class='cat']/a/@href")
                for link in links:
                    if len(self.base_urls) > 1 or len(self.base_urls[0]) > 3:
                        time.sleep(random.randint(5,27))
                    try:
                        responses.append(requests.get(link))
                        print link
                    except requests.exceptions.ConnectionError:
                        print "hitting connection error"
                        continue

        for r in responses:
            text = r.text
            html = lxml.html.fromstring(text)
            values["title"] = html.xpath("//div[@id='postingTitle']/a/h1")[0].text_content()
            values["link"] = unidecode(r.url)
            values["new_keywords"] = []
            try:
                values["images"] = html.xpath("//img/@src")
            except IndexError:
                values["images"] = "weird index error"
            pre_decode_text = html.xpath("//div[@class='postingBody']")[0].text_content().replace("\n","").replace("\r","")  
            values["text_body"] = pre_decode_text 
            try:
                values["posted_at"] = html.xpath("//div[class='adInfo']")[0].text_content().replace("\n"," ").replace("\r","")
            except IndexError:
                values["posted_at"] = "not given"
            values["scraped_at"] = str(datetime.datetime.now())
            body_blob = TextBlob(values["text_body"])
            title_blob = TextBlob(values["title"])
            values["language"] = body_blob.detect_language() #requires the internet - makes use of google translate api
            values["polarity"] = body_blob.polarity
            values["subjectivity"] = body_blob.sentiment[1]
            if values["language"] != "en" and not translator:
                values["translated_body"] = body_blob.translate(from_lang="es")
                values["translated_title"] = title_blob.translate(from_lang="es")
            else:
                values["translated_body"] = "none"
                values["translated_title"] = "none"
            text_body = values["text_body"]
            title = values["title"]
            values["phone_numbers"] = self.phone_number_parse(values)
            data.append(values)
        
        return data
开发者ID:John-Keating,项目名称:investa_gator_v2,代码行数:62,代码来源:crawler.py

示例5: update_book

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def update_book(book):
    blob = TextBlob(book.description)

    if blob.detect_language() == 'en':
        description = ''
        nouns = filter(lambda x: x[1] == 'NN' or x[1] == 'NNP', blob.tags)

        for noun, tag in nouns:
            description += noun + " "

            if len(noun) > 2:
                description += TextBlob(noun).translate(to='ko').string + " "

    else:
        description = book.description

    book_document = search.Document(
        doc_id=book.ISBN,
        fields=[
            search.TextField(name='title', value=remove_punc(book.title)),
            search.TextField(name='author', value=remove_punc(book.author)),
            search.TextField(name='description', value=remove_punc(description))
        ]
    )

    index = get_book_index()
    index.put(book_document)
开发者ID:ZeroPage,项目名称:zp-library-old,代码行数:29,代码来源:library_search.py

示例6: answer

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def answer(question):
    global IsAnswer,detected,u
    IsAnswer = True
    DetectLang = TextBlob(question)
    detected = DetectLang.detect_language()
    if detected == 'en':
        print("language detected: en")
        u = 'en'
        print(len(words),"len(words)")
        low = question.lower()
        questions = re.sub('[^\w]',' ',low).split() #list
        BadWords(questions)
        print(questions)
        def writeout(words,question,IsAnswer):
            r = []
            if len(words) > 3000:
                a1 = len(questions)
                for x in range(0,a1):
                    words.remove(random.choice(words))
                print(len(words),"len(words)")
            else:
                pass
            os.remove('newwords.txt')
            file = open('newwords.txt','w')
            words.extend(questions)
            r.extend(words)
            s = ' '.join(r)
            file.write(s)
        writeout(words,question,IsAnswer)
        randomthought()
    else:
        u = detected
        print("language detected:",u)
        randomthought()
开发者ID:tinypirates,项目名称:minniebot-mk2,代码行数:36,代码来源:current.py

示例7: review_features_romanian

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def review_features_romanian(reviewText, type):
    review_spacy = nlp(reviewText)
    review_textblob = TextBlob(reviewText)
    review_spacy_ents = review_spacy.ents
    word_features_array = []
    # print(review_textblob)
    if not review_textblob.detect_language() == 'en':
        review_textblob = review_textblob.translate(to='en')
        review_spacy = nlp(review_textblob.string)
    else:
        contains_romanian_words = 0

        for word in review_textblob.words:
            word_textblob = TextBlob(word)
            if len(word_textblob.string) >= 3 and word_textblob.detect_language() == 'ro':
                contains_romanian_words = 1
                break

        if contains_romanian_words == 1:
            new_reviewText = ''
            for word in review_spacy:
                word_textblob = TextBlob(word.orth_)
                if not word.is_title and len(word_textblob.string) >= 3:
                    if word_textblob.detect_language() != 'ro':
                        new_reviewText = new_reviewText + ' ' + word_textblob.string
                    else:
                        new_word = word_textblob.translate(to='en')
                        new_reviewText = new_reviewText + ' ' + new_word.string
                else :
                    new_reviewText = new_reviewText + ' ' + word_textblob.string
            review_textblob = TextBlob(new_reviewText)
            review_spacy = nlp(review_textblob.string)
            # print(review_spacy)w_spacy)
    for i in range(len(review_spacy)):
        word = review_spacy[i]
        # if not word.is_stop and not word.is_punct:
        if (word.pos == NOUN or (word.pos == VERB and TextBlob(word.orth_).sentiment.polarity > 0) or word.pos == ADJ or word.pos == ADV) and not word.is_punct:
        # if word.pos == NOUN:
            if type == labelType.Label.aspect:
                word_features_array.append(word_aspect_features(review_spacy, review_textblob, review_spacy_ents, i))
            elif type == labelType.Label.attribute:
                word_features_array.append(word_attribute_features(review_spacy, review_textblob, review_spacy_ents, i))
            elif type == labelType.Label.polarity:
                word_features_array.append(word_polarity_features(review_spacy, review_textblob, review_spacy_ents, i))
            elif type == labelType.Label.emotion:
                word_features_array.append(word_emotion_features(review_spacy, review_textblob, review_spacy_ents, i))
    return word_features_array
开发者ID:sarosicami,项目名称:ReviewerFlask,代码行数:49,代码来源:extract.py

示例8: translate_this

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def translate_this(jenni, msg):
    t_msg = TextBlob(msg.groups()[0])
    from_lang = t_msg.detect_language()
    if from_lang != 'en':
        translated = t_msg.translate(from_lang=from_lang, to='en')
        jenni.reply("{}".format(translated))
    else:
        return
开发者ID:nicklewis,项目名称:brittbot,代码行数:10,代码来源:test.py

示例9: scanForMultipleLanguages

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def scanForMultipleLanguages(target, words):
    langmap = makeLangPrefixMapping()
    langprefs = set()
    for word in words:
        blob = TextBlob(words)
        detect = blob.detect_language()
        if detect is not langmap[target]:
            langprefs.add(detect)
    return langprefs
开发者ID:squidnee,项目名称:lingo-bean,代码行数:11,代码来源:giveawayFeatures.py

示例10: rating

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
 def rating(self):
     if self._rating:
         return self._rating
     elif len(self.text) > 3:
         blob = TextBlob(self.text)
       
         try:
             if blob.detect_language() == 'en':
                 return round(min(max(blob.sentiment.polarity, -0.5), 0.5) * 4 + 3)
         except urllib.error.HTTPError:
             LOG.warning("Rating detection failed: HTTPError")
             return None
开发者ID:SkyPicker,项目名称:StarPicker,代码行数:14,代码来源:reviews.py

示例11: find_loc

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def find_loc(p1):
    if p1 != "None":
        for city in cities_l:
            if city in p1.lower():
                return city
        for k, v in cities_nn.iteritems():
            if k in p1.lower():
                return v
        t1 = TextBlob(p1.lower())
        if "la" in p1.lower() and t1.detect_language() == "en":
            return "los angeles"
    return "None"
开发者ID:dmilad,项目名称:ChattyCity,代码行数:14,代码来源:clean_tweets.py

示例12: getEngTag

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
    def getEngTag(self, tag):
        "Get the tag in English"
        tagName = TextBlob(tag.decode('utf-8'))
        tagName = tagName.words[0].singularize()
        
        if len(tagName) >= 3:
            lang = tagName.detect_language()

            if lang != 'en':
                tagName = tagName.translate(from_lang=lang, to='en')

        return tagName.encode('utf-8')     
开发者ID:PaulPidou,项目名称:Food-Clustering-Project,代码行数:14,代码来源:preprocess.py

示例13: handle

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def handle(request):
    page = request.match_info.get('page')
    content = yield from fetch_page(page)
    text = strip(content)
    blob = TextBlob(text.decode('utf-8'))
    words = list({ w for w in blob.words if len(w) > 4})
    words.sort()
    body = { 'sentences': len(blob.sentences),
             'words': len(words),
             'language': blob.detect_language(),
             'blob': words }
    return web.Response(body=json.dumps(body).encode('utf-8'),
                        content_type="application/json; charset=utf-8")
开发者ID:dnslj,项目名称:python-labs,代码行数:15,代码来源:web.py

示例14: echo

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
def echo(word, word_eol, userdata):
    global my_language
    try:
        original = TextBlob(word_eol[3][1:].decode("utf-8"))
        lang = original.detect_language()
        nick = word[0].split("!")[0].replace(":","")
        if lang != my_language:
            res = original.translate(from_lang=lang, to=my_language)
        if len(res) > 0:
            print("\037\00312" + nick + " said: " + str(res).replace( \
                  "\n","") + " (From lang=%s)" % str(lang))
        return hexchat.EAT_NONE
    except:
        return hexchat.EAT_NONE
开发者ID:l300lvl,项目名称:hexchat_addon,代码行数:16,代码来源:plugin.py

示例15: parse_text_meta_data

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import detect_language [as 别名]
 def parse_text_meta_data(self,html,values):
     if self.debug: print "Processing textual information - language, polarity, subjectivity.."
     body_blob = TextBlob(values["text_body"])
     title_blob = TextBlob(values["title"])
     values["language"] = body_blob.detect_language() #requires the internet - makes use of google translate api
     values["polarity"] = body_blob.polarity
     values["subjectivity"] = body_blob.sentiment[1]
     if values["language"] != "en" and not translator:
         values["translated_body"] = body_blob.translate(from_lang="es")
         values["translated_title"] = title_blob.translate(from_lang="es")
     else:
         values["translated_body"] = "none"
         values["translated_title"] = "none"
     return values
开发者ID:hackingagainstslavery,项目名称:investa_gator,代码行数:16,代码来源:crawler.py


注:本文中的textblob.TextBlob.detect_language方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。