当前位置: 首页>>代码示例>>Python>>正文


Python textblob.TextBlob类代码示例

本文整理汇总了Python中textblob.TextBlob的典型用法代码示例。如果您正苦于以下问题:Python TextBlob类的具体用法?Python TextBlob怎么用?Python TextBlob使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了TextBlob类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _translate_message

def _translate_message(bot, broadcast_list, context):
    if context and "autotranslate" in context:
        _autotranslate = context["autotranslate"]
        origin_language = _get_room_language(bot, _autotranslate["conv_id"])
        for send in broadcast_list:
            target_conversation_id = send[0]
            response = send[1]
            target_language = _get_room_language(bot, target_conversation_id)
            if origin_language != target_language:
                logger.debug("translating {} to {}".format(origin_language, target_language))
                translated = _autotranslate["event_text"]
                try:
                    en_blob = TextBlob(_autotranslate["event_text"])
                    translated = "{0}".format(en_blob.translate(to=target_language))
                    #translated = gs.translate(_autotranslate["event_text"], target_language
                except Exception:
                    logger.debug("Translation Api returned string unchanged")
                else:
                    pass
                finally:
                    if _autotranslate["event_text"] != translated:
                    # mutate the original response by reference
                        response.extend([
                            hangups.ChatMessageSegment('\n', hangups.SegmentType.LINE_BREAK),
                            hangups.ChatMessageSegment('(' + translated + ')')])
开发者ID:0xD3ADB33F,项目名称:hangoutsbot,代码行数:25,代码来源:_DEPRECATED_syncrooms_autotranslate.py

示例2: check_speech_patterns

def check_speech_patterns(text):
	PATTERNS={
		("PRP","DT"),
		("CC","VBD"),
		("VB","RB"),
		("VB","PRP$"),
		("NN","POS"),
		("NN","MD","VB"),
		("VB","PRP$","NN"),
		("MD","VB","VBN"),
		("NN","IN","PRP$"),
		("IN","PRP$","JJ"),
		("VB","PRP","DT","NN"),
		("VBD","RB","JJ","NNS"),
		("NNP","NNP","NNP","NNP"),
		("PRP$","NN","CC","PRP"),
		("NNP", "NNP", "NNP", "NNP", "NNP"), 
		("NN", "IN", "DT", "NNS", "IN"),
		("PRP$", "NN", "IN", "DT", "NN"),
		("IN", "DT", "NN", "WDT", "VBZ"),
		("NN", "IN", "PRP$", "JJ", "NN"),
		("DT", "NN", "IN", "NN", "NN")
	}
	blob= TextBlob(text)
	for i in range (2,6):
		ngrams=blob.ngrams(n=i)
		for gram in ngrams:
			str_gram=" ".join(gram)
			gram_blob=TextBlob(str_gram)
			tags=gram_blob.tags
			lst1, lst2 = zip(*tags)
			if lst2 in PATTERNS:
				return True
	return False
开发者ID:code-11,项目名称:BloombergSarcasm,代码行数:34,代码来源:sentiment_analysis_3.py

示例3: scrape

    def scrape(self,links=[],ads=True,translator=False):
        responses = []
        values = {}
        data = []
        
        if ads:
            for link in links:
                r = requests.get(link)
                responses.append(r)
        else:
            for link in links:
                r = requests.get(link)
                text = unidecode(r.text)
                html = lxml.html.fromstring(text)

                links = html.xpath("//div[@class='cat']/a/@href")
                for link in links:
                    if len(self.base_urls) > 1 or len(self.base_urls[0]) > 3:
                        time.sleep(random.randint(5,27))
                    try:
                        responses.append(requests.get(link))
                        print link
                    except requests.exceptions.ConnectionError:
                        print "hitting connection error"
                        continue

        for r in responses:
            text = r.text
            html = lxml.html.fromstring(text)
            values["title"] = html.xpath("//div[@id='postingTitle']/a/h1")[0].text_content()
            values["link"] = unidecode(r.url)
            values["new_keywords"] = []
            try:
                values["images"] = html.xpath("//img/@src")
            except IndexError:
                values["images"] = "weird index error"
            pre_decode_text = html.xpath("//div[@class='postingBody']")[0].text_content().replace("\n","").replace("\r","")  
            values["text_body"] = pre_decode_text 
            try:
                values["posted_at"] = html.xpath("//div[class='adInfo']")[0].text_content().replace("\n"," ").replace("\r","")
            except IndexError:
                values["posted_at"] = "not given"
            values["scraped_at"] = str(datetime.datetime.now())
            body_blob = TextBlob(values["text_body"])
            title_blob = TextBlob(values["title"])
            values["language"] = body_blob.detect_language() #requires the internet - makes use of google translate api
            values["polarity"] = body_blob.polarity
            values["subjectivity"] = body_blob.sentiment[1]
            if values["language"] != "en" and not translator:
                values["translated_body"] = body_blob.translate(from_lang="es")
                values["translated_title"] = title_blob.translate(from_lang="es")
            else:
                values["translated_body"] = "none"
                values["translated_title"] = "none"
            text_body = values["text_body"]
            title = values["title"]
            values["phone_numbers"] = self.phone_number_parse(values)
            data.append(values)
        
        return data
开发者ID:John-Keating,项目名称:investa_gator_v2,代码行数:60,代码来源:crawler.py

示例4: findLanguage

def findLanguage(reducedList3):
	languageMap = {}
	currentNumber = 0

	shuffle(reducedList3)
	for i in reducedList3:
		if currentNumber < 5000:
			if len(i[0]) > 5:
				try:
					b = TextBlob(unicode(i[0]))
					currentLanguage = b.detect_language()
					if currentLanguage in languageMap:
						languageMap[currentLanguage] += 1
					else:
						languageMap[currentLanguage] = 1
				except: 
					pass
			currentNumber += 1
			print currentNumber

	listOfWords = []
	for i in languageMap:
		for x in range(0, languageMap[i]):
			listOfWords.append(i)

	listOfWordsCounter = collections.Counter(listOfWords)
	print 'Best Languages:', listOfWordsCounter.most_common(5)

	print languageMap
开发者ID:AbhiAgarwal,项目名称:classes,代码行数:29,代码来源:Agarwal-Homework3.py

示例5: extract

def extract(ngrams, dataset, doc_id):
    # extract keywords
    print 'Extracting keywords'
    for i, ngram in enumerate(ngrams):
        doc = doc_id[i]

        if field not in dataset[doc]:
            dataset[doc][field] = set()

            if doc > 0 and doc % 1000 == 0:
                print '\t', doc

        for kw in filter(lambda k: '_' in k, ngram):
            keyword = kw.replace('_', ' ')

            kw_tb = TextBlob(keyword)

            # filter out punctuation, etc (make sure that there are two non-punc words)
            if len(kw_tb.words) < 2:
                continue

            # add keywords which are all proper nouns
            distinct_tags = set(t[1] for t in kw_tb.tags)
            if distinct_tags - {'NNP', 'NNPS'} == {}:
                dataset[doc][field].add(kw_tb.lower())
                continue

            # add noun phrases
            for np in kw_tb.lower().noun_phrases:
                dataset[doc][field].add(np)

    return kw_set_to_list(dataset)
开发者ID:MattL920,项目名称:nasaMining,代码行数:32,代码来源:extract.py

示例6: tag_documents_text

def tag_documents_text(client):

	documents = client['cornell']['documents']
	for doc in documents.find():
		blob = TextBlob(doc['text'], pos_tagger=PerceptronTagger())
		parsed_blob = blob.parse()
		documents.update({'name':doc['name']},{'$set':{'parsed_perceptron':parsed_blob}})
开发者ID:matheuscas,项目名称:fuzzy_opinion_mining,代码行数:7,代码来源:model.py

示例7: update_book

def update_book(book):
    blob = TextBlob(book.description)

    if blob.detect_language() == 'en':
        description = ''
        nouns = filter(lambda x: x[1] == 'NN' or x[1] == 'NNP', blob.tags)

        for noun, tag in nouns:
            description += noun + " "

            if len(noun) > 2:
                description += TextBlob(noun).translate(to='ko').string + " "

    else:
        description = book.description

    book_document = search.Document(
        doc_id=book.ISBN,
        fields=[
            search.TextField(name='title', value=remove_punc(book.title)),
            search.TextField(name='author', value=remove_punc(book.author)),
            search.TextField(name='description', value=remove_punc(description))
        ]
    )

    index = get_book_index()
    index.put(book_document)
开发者ID:ZeroPage,项目名称:zp-library-old,代码行数:27,代码来源:library_search.py

示例8: process_status

def process_status(status, lang):
    text = ""

    # translate
    if lang == 'en':
        text = status['text']
    else:
        blob = TextBlob(status['text'])
        try:
            text = str(blob.translate())
        except textblob.exceptions.NotTranslated:
            text = status['text']

    # sentiment analysis
    sentiment = TextBlob(text).sentiment

    return {
          "created_at": 1000 * int(time.mktime((status['created_at']).timetuple()))
        , "id_str": status['id_str']
        , "text": text
        , "sentiment": {"polarity": sentiment[0], "subjectivity": sentiment[1]}
        , "retweet_count": status['retweet_count']
        , "in_reply_to_status_id_str": status['in_reply_to_status_id_str']
        , "geo": status['geo']
        , "retweeted": status['retweeted']
        , "in_reply_to_user_id_str": status['in_reply_to_user_id_str']
            }
开发者ID:fourvvvv,项目名称:dt-twitter-network,代码行数:27,代码来源:status_wall.py

示例9: matchRhyme

def matchRhyme(word1,word2):
    #str1 = "tekst"
    #word1+="टेक्स्ट"
    str1 = ""
    str2 = ""

    word1+= "टेक्स्ट"
    word2+= "टेक्स्ट"

    str1 += " " + word1
    str2 += " " + word2

    hindi_blob1 = TextBlob(str1)
    hindi_blob2 = TextBlob(str2)

    transliteratedtxt1 = hindi_blob1.translate(from_lang="hi", to='en')
    transliteratedtxt1=transliteratedtxt1.substring[:-5]
    transliteratedtxt2 = hindi_blob2.translate(from_lang="hi", to='en')
    transliteratedtxt2= transliteratedtxt2.substring[:-5]

    word1Index= len(transliteratedtxt1)
    word2Index= len(transliteratedtxt2)
    ##Matcing last charater if they are same!!
    if (transliteratedtxt1[word1Index-1] == transliteratedtxt2[word2Index-1]):

        #rhymeMeter=3;
        ##Matching if second Last character is any of the Matras!!
        if ( ((transliteratedtxt1[word1Index-2]=='a') and (transliteratedtxt2[word2Index-2]=='a')) or ((transliteratedtxt1[word1Index-2]=='e') and (transliteratedtxt2[word2Index-2]=='e'))or ((transliteratedtxt1[word1Index-2]=='o') and (transliteratedtxt2[word2Index-2]=='o')) or ((transliteratedtxt1[word1Index-2]=='i') and (transliteratedtxt2[word2Index-2]=='i')) or ((transliteratedtxt1[word1Index-2]=='u') and (transliteratedtxt2[word2Index-2]=='u')) ):
            rhymeMeter=5
        else:
            if(transliteratedtxt1[word1Index-2]!=transliteratedtxt1[word1Index-2]):
                rhymeMeter=4
    return rhymeMeter
开发者ID:ManasMahanta,项目名称:Final_project,代码行数:33,代码来源:PhraseClassify.py

示例10: _german

    def _german(self, text):
        blob = TextBlob(text)

        try:
            return str(blob.translate(to="en"))
        except:
            return text
开发者ID:firecurious,项目名称:kokoro,代码行数:7,代码来源:translate.py

示例11: on_command

    def on_command(self, msg, stdin, stdout, reply):
        # pre-process args
        # this might mess up if "from" or "to" is left out and
        # the message contains "from" or "to"
        self._push_character(msg["args"], "from", "-", 1)
        self._push_character(msg["args"], "to",   "-", 1)

        try:
            args = self.parser.parse_args(msg["args"][1:])
        except (argparse.ArgumentError, SystemExit):
            return

        # get message from the appropriate place
        if args.message:
            message = " ".join(args.message)
        else:
            message = stdin.read().strip()

        # translate
        from_lang = args.from_language
        to_lang   = args.to_language
        message   = TextBlob(message)
        try:
            translated = message.translate(from_lang=from_lang, to=to_lang)
        except:
            pass
        else:
            print(translated, file=stdout)
开发者ID:bet0x,项目名称:smartbot,代码行数:28,代码来源:translate.py

示例12: getKeywords

def getKeywords(text, useless):
	text = TextBlob(text)
	for word in text.words:
		for bad in useless:
			if word is bad:
				text.remove(word)
	return text
开发者ID:anmousyon,项目名称:python,代码行数:7,代码来源:db_fill.py

示例13: gen_translate

def gen_translate(msg, fromlang=None, outputlang='en'):
    try:
        blob = TextBlob(msg)
        blob = blob.translate(from_lang=fromlang, to=outputlang)
        return str(blob)
    except NotTranslated:
        return msg
开发者ID:ubuntor,项目名称:cslbot,代码行数:7,代码来源:textutils.py

示例14: getEntities

def getEntities(parser, tweet, xEntities):
	try:
		spacyParsedObject = parser(tweet)
		sentence =  TextBlob(tweet)
		textblobTaggedObject = sentence.parse().split()
		patterntaggedObject = tag(tweet, tokenize=True)
		for word in patterntaggedObject:
			word, wordtag=word
			if  wordtag == "NNP" or  wordtag == "NN" or  wordtag == "PRP":
				v = str(word)
				v = v.strip()
				if(v not in xEntities):	
					xEntities[v]=str(wordtag)						
		for taggedObject in textblobTaggedObject:
			for word in taggedObject:
				word, wordtag=word[0], word[1]
				if wordtag == "NNP" or wordtag == "NN" or wordtag == "PRP":
					v = str(word)
					v = v.strip()
					if(v not in xEntities):	
						xEntities[v]=str(wordtag)
		for word in spacyParsedObject:
			if word.tag_ == "NNP" or word.tag_ == "NN" or word.tag_ == "PRP":
				v = str(word)
				v = v.strip()
				if(v not in xEntities):	
					xEntities[v]=str(word.tag_)
		return xEntities
	except Exception as e:
		return e
		
开发者ID:project-spinoza-dev,项目名称:tsakpy,代码行数:30,代码来源:getEntities.py

示例15: hi

def hi(bot, trigger):
    lang_codes = ['af', 'ga', 'sq', 'it', 'ar',	'ja', 'az', 'kn', 'eu', 'ko', 'bn', 'la', 'en']
    trans = TextBlob('Greetings dear '+trigger.nick+'on the road of life ')
    ind = randint(0, 12)
    trans = trans.translate(to=lang_codes[ind])
    saying = str(trans)
    bot.say(saying)
开发者ID:Amanda-Clark,项目名称:IRC_Bot_Code,代码行数:7,代码来源:Amanda.py


注:本文中的textblob.TextBlob类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。