当前位置: 首页>>代码示例>>Python>>正文


Python TextBlob.ngrams方法代码示例

本文整理汇总了Python中textblob.TextBlob.ngrams方法的典型用法代码示例。如果您正苦于以下问题:Python TextBlob.ngrams方法的具体用法?Python TextBlob.ngrams怎么用?Python TextBlob.ngrams使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在textblob.TextBlob的用法示例。


在下文中一共展示了TextBlob.ngrams方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: check_speech_patterns

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def check_speech_patterns(text):
	PATTERNS={
		("PRP","DT"),
		("CC","VBD"),
		("VB","RB"),
		("VB","PRP$"),
		("NN","POS"),
		("NN","MD","VB"),
		("VB","PRP$","NN"),
		("MD","VB","VBN"),
		("NN","IN","PRP$"),
		("IN","PRP$","JJ"),
		("VB","PRP","DT","NN"),
		("VBD","RB","JJ","NNS"),
		("NNP","NNP","NNP","NNP"),
		("PRP$","NN","CC","PRP"),
		("NNP", "NNP", "NNP", "NNP", "NNP"), 
		("NN", "IN", "DT", "NNS", "IN"),
		("PRP$", "NN", "IN", "DT", "NN"),
		("IN", "DT", "NN", "WDT", "VBZ"),
		("NN", "IN", "PRP$", "JJ", "NN"),
		("DT", "NN", "IN", "NN", "NN")
	}
	blob= TextBlob(text)
	for i in range (2,6):
		ngrams=blob.ngrams(n=i)
		for gram in ngrams:
			str_gram=" ".join(gram)
			gram_blob=TextBlob(str_gram)
			tags=gram_blob.tags
			lst1, lst2 = zip(*tags)
			if lst2 in PATTERNS:
				return True
	return False
开发者ID:code-11,项目名称:BloombergSarcasm,代码行数:36,代码来源:sentiment_analysis_3.py

示例2: sentiment_pattern

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def sentiment_pattern(text, gram_n=6):
	blob= TextBlob(text)
	ngrams=blob.ngrams(n=gram_n)
	sentiment_list=[]
	datalist = []
	for gram in ngrams:
		str_gram=" ".join(gram)
		print str_gram
		data = (0, 0, str_gram, None)
		datalist.append(Datapoint(*data))

		#gram_blob=TextBlob(str_gram)
		#sentiment=gram_blob.sentiment[0]
		#if sentiment>0:
		#	sentiment=1
		#elif sentiment<0:
		#	sentiment=-1
		#sentiment_list.append(sentiment)

	predictor = pickle.load(open("predictor.pickle", "rb" ) )
	prediction = predictor.predict(datalist)

	for sentiment in prediction:
		sentiment = int(sentiment)
		if sentiment < 2: sentiment_list.append(-1)
		if sentiment == 2: sentiment_list.append(0)
		if sentiment > 2: sentiment_list.append(1)

	print sentiment_list

	return sentiment_list
开发者ID:code-11,项目名称:BloombergSarcasm,代码行数:33,代码来源:sentiment_analysis_2.py

示例3: get_tupels

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def get_tupels(text):
    lower = text.lower()
    blob = TextBlob(lower)
    ngrams = blob.ngrams(n=2) # assumption: don't is two words (do n't), as in "do not"
                              # this can be easily changed by modifying the tokenizer
                              # http://stackoverflow.com/questions/30550411
    tuples = map(tuple,map(tuple, ngrams))
    return tuples
开发者ID:Incognito,项目名称:py-ngrams-toy,代码行数:10,代码来源:main.py

示例4: sentiment_reviews

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def sentiment_reviews(reviews, gram_n=5, predictor=None):
	datalist = []

	tag = []
	counttag = [0] * len(reviews)
	for (i, review) in enumerate(reviews):
		blob = TextBlob(review)
		ngrams=blob.ngrams(n=min(gram_n, len(blob.words)))
		
		for gram in ngrams:
			str_gram=" ".join(gram)
			data = (0, 0, str_gram, None)
			datalist.append(Datapoint(*data))
			tag.append(i)
			counttag[i] += 1

	print "start prediction"

	prediction = predictor.predict(datalist)

	cstm = [[0] * 5 for x in reviews]
	for (i, sentiment) in enumerate(prediction):
		sentiment = int(sentiment)
		cstm[tag[i]][sentiment] += 1.0 / counttag[tag[i]]

	trating = 0.0
	tcount = 0.0

	for i in range(len(reviews)):
		if counttag[i] == 0:
			continue
			
		cstm[i][2] = cstm[i][2] / math.pow(counttag[i], 0.44)
		cstm[i][0] = cstm[i][0] * math.pow(counttag[i], 0.22)
		cstm[i][3] = cstm[i][3] * math.pow(counttag[i], 0.22)
		rating = 0.0
		count = 0.0
		for j in range(5):
			rating += (j + 1) * cstm[i][j]
			count += cstm[i][j]

		print cstm[i], " ", counttag[i]

		t = 1 / (1 + math.exp(-(cstm[i][2] / count - 0.45) * 15))
		print cstm[i][2] / count
		trating += rating / count * (1 - t)
		tcount += 1 - t

	trating = trating / tcount
	if trating > 3:
		x = trating - 3
		x = math.pow(x, 0.4647) * 1.4492
		return x + 3
	else:
		x = 3 - trating
		x = math.pow(x, 0.4647) * 1.4492
		return 3 - x
开发者ID:code-11,项目名称:BloombergSarcasm,代码行数:59,代码来源:truerating.py

示例5: GetBigrams

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def GetBigrams(text):
    blob = TextBlob(text)
    WordLists = blob.ngrams(n = 2)
    Bigrams = []
    for wordlist in WordLists:
       cstr = ''
       for word in wordlist:    cstr = cstr+word+"_"
       Bigrams.append(cstr)
    return Bigrams
开发者ID:smritisingh,项目名称:Review-Spam-Detection,代码行数:11,代码来源:bigram.py

示例6: build_ngrams

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def build_ngrams(text, language='en'):
    blob = TextBlob(lower(text, language))
    ngrams = [blob.ngrams(n=n) for n in (3, 2, 1)]
    wordlists = reduce(operator.add, ngrams)
    tokenized = (
        tokenize(wordlist, language, stem=True)
        for wordlist in wordlists)
    pure = (
        tokenize(wordlist, language, stem=False)
        for wordlist in wordlists)
    return itertools.chain(tokenized, pure)
开发者ID:Atala,项目名称:arguman.org,代码行数:13,代码来源:utils.py

示例7: _get_detailed_stats

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def _get_detailed_stats(no_code_text):
    """
    Returns detailed stats on text
    :param no_code_text: String to analyse
    :return: list of details
    """
    results = []
    group_by = 'Detailed Text Statistics'
    tb = TextBlob(no_code_text)
    # Spell check here...it's very slow
    results.append(TextFeature('Number of sentences', textstat.sentence_count(no_code_text), group_by))
    results.append(TextFeature('Number of sentences (again)', len(tb.sentences), group_by))
    results.append(TextFeature('Number of words', len(tb.words), group_by))
    results.append(TextFeature('Sentiment Polarity', tb.sentiment.polarity, group_by))
    results.append(TextFeature('Sentiment Subjectivity', tb.sentiment.subjectivity, group_by))
    results.append(TextFeature('Detected Language', tb.detect_language(), group_by))
    results.append(TextFeature('Number of important phrases', len(tb.noun_phrases), group_by))
    results.append(TextFeature('Number of word bi-grams', len(tb.ngrams(2)), group_by))
    results.append(TextFeature('Number of word tri-grams', len(tb.ngrams(3)), group_by))
    results.append(TextFeature('Number of word 4-grams', len(tb.ngrams(4)), group_by))
    return results
开发者ID:AWegnerGitHub,项目名称:SE_Zephyr_VoteRequest_bot,代码行数:23,代码来源:utils.py

示例8: extract_trigrams

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def extract_trigrams(client):
	documents = client['cornell']['documents']

	for doc in documents.find():
		blob = TextBlob(doc['text'])
		valid_trigrams = []
		for s in blob.sentences:
			sentence = TextBlob(s.dict['raw'])
			sentence = TextBlob(sentence.parse())
			trigrams = sentence.ngrams(n=3)
			valid_trigrams = valid_trigrams + get_valid_trigrams(trigrams)
		documents.update({'name':doc['name']},{'$set':{'trigrams':valid_trigrams}})
开发者ID:matheuscas,项目名称:fuzzy_opinion_mining,代码行数:14,代码来源:model.py

示例9: getNGrams

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def getNGrams(text, n):
    blob = TextBlob(text)
    listofBlobs = blob.ngrams(n)
    listofBigrams = []
    for wordList in listofBlobs:
        flag = True
        for item in wordList:
            if flag:
                bigram = unicode(item)
                flag = False
            else:
                bigram = bigram + " "+ unicode(item)
        # print type(bigram)
        listofBigrams.append(bigram)
    return listofBigrams
开发者ID:shubham192,项目名称:ML-Project,代码行数:17,代码来源:processor.py

示例10: names_ext

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def names_ext(sentence):
    ''' Extracts Names using first_name_search and last_name_search '''
    sentence = TextBlob(sentence)
    possible_names = sentence.noun_phrases
    print "NOUN PHRASES: ", possible_names
    sentence = sentence.ngrams(n=2)
    names = []
    female_first = open('./Names_db/Females_Firsts.txt').read().strip().split("\n")
    male_first = open('./Names_db/Males_Firsts.txt').read().strip().split("\n")
    all_last = open('./Names_db/Last_Namess.txt').read().strip().split("\n")

    for phrases in sentence:
        female_names = first_name_search(phrases[0],female_first)
        male_names = first_name_search(phrases[0],male_first)
        last_names = last_name_search(phrases[1],all_last)
        if female_names and male_names and last_names != "None":
            print female_names
            print male_names
            print last_names, "\n"
    return "None"
开发者ID:SulavKhadka,项目名称:content_extract,代码行数:22,代码来源:content_extract.py

示例11: blogWords

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
 def blogWords(self):
     regex1 = '[^a-zA-Z0-9-/]'
     regex2 = '[^a-zA-Z0-9-\'\"/]'
     filename = 'blogwords.txt'
     i = 0
     textblob = TextBlob(" ".join(self.listOfWords))
     #load blog words text file
     blogWords_file = open(filename, 'r')
     #line represents a blog word
     for line in blogWords_file:
         #Remove non-alphanumeric characters in sequence
         line = re.sub(regex2, ' ', line)
         #array of words in line
         lineArray = [x.lower() for x in line.split()]
         #entry represents an n-gram instance of the input text
         for entry in textblob.ngrams(n = len(lineArray)):
             entry = [re.sub(regex1, '', x).lower() for x in entry]
             if lineArray == entry:
                 i += 1
     return i    
开发者ID:jkdll,项目名称:BlogGenderClassification,代码行数:22,代码来源:features.py

示例12: sentiment_pattern

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def sentiment_pattern(text, gram_n=6, predictor=None):
	
	blob= TextBlob(text)
	ngrams=blob.ngrams(n=gram_n)
	sentiment_list=[]
	datalist = []
	for gram in ngrams:
		str_gram=" ".join(gram)
		data = (0, 0, str_gram, None)
		datalist.append(Datapoint(*data))

	prediction = predictor.predict(datalist)

	for sentiment in prediction:
		sentiment = int(sentiment)
		if sentiment < 2: sentiment_list.append(-1)
		if sentiment == 2: sentiment_list.append(0)
		if sentiment > 2: sentiment_list.append(1)

	return sentiment_list
	"""
开发者ID:code-11,项目名称:BloombergSarcasm,代码行数:23,代码来源:sentiment_analysis_3.py

示例13: index

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def index():
  response.content_type = 'text/text; charset=utf-8'
  
  ret =  'Hi there, I\'m process {0}!\n\n'.format(os.getpid())

  sentence = 'Now is better than never.'
  ret += 'Testing TextBlob ngram (n=3) with sentence: \n "{0}" \n'.format(sentence)
  blob = TextBlob(sentence)
  for word_list in blob.ngrams(n=3):
    ret += (' '.join(word_list) + '\n')
  
  data = pd.DataFrame({'A': np.random.randn(3), 'B': np.random.randn(3)})
  func = "pd.DataFrame({'A': np.random.randn(3), 'B': np.random.randn(3)})"
  ret += '\nTesting Numpy and Pandas with command: \n {0} \n{1} \n'.format(func, data.to_json())
    
  ret += '\nCode at: \n https://github.com/alyssaq/bottle-heroku-skeleton \n'
  ret += '\nEnvironment vars:\n'

  for k, v in env.iteritems():
    if 'bottle.' in k:
      continue
    ret += '%s=%s\n' % (k, v)

  return ret
开发者ID:alyssaq,项目名称:bottle-heroku-skeleton,代码行数:26,代码来源:app.py

示例14: on_success

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
    def on_success(self, data):
        # Digest
        if 'text' in data:
            line = data['text']
            
            # Do n-grams
            blob = TextBlob(line)
            ngrams = list(blob.ngrams(n=2))
            for ng in ngrams:
                for word in list(ng):
                        word = word.lower()
                        if word in self.stemmer.stems.keys():
                                word = self.stemmer.stems[word]
                        match = re.search('\w+',word)
                        if match:
                                word = match.group()
                        if word in self.stop_words:
                                word = ''
                if ng[0] and ng[1]:
                        if ' '.join(ng) in self.bookshelf.keys():
                                self.bookshelf[' '.join(ng)] += 1
                        else:
                                self.bookshelf[' '.join(ng)] = 1

            '''
            # Do Unigrams
            for word in line.split(' '):
                word = word.lower()

                # Stemming
                if word in self.stemmer.stems.keys():
                    word = self.stemmer.stems[word]

                # Removing punctuation
                match = re.search('\w+',word)
                if match:
                    word = match.group()

                # Stop words
                if word not in self.stop_words:
                    if word in self.bookshelf.keys():
                        self.bookshelf[word] += 1
                    else:
                        self.bookshelf[word] = 1
            '''

            self.count += 1

            # How often to update? Framerate will depend on the number of relevant 
            # tweets, so one size does not necessarily fit all          
            it = 5
            
            # Move forward
            if self.count % it == 0:
                sorted_words = sorted(self.bookshelf.items(), key=operator.itemgetter(1), reverse=True)
                
                clear()
                print('=== Update ==='.format(self.count))
                
                for i in range(0,20):
                    print('{}): {} [{}]'.format(str(i+1),sorted_words[i][0],sorted_words[i][1]))
开发者ID:mypolopony,项目名称:streamer,代码行数:63,代码来源:stream.py

示例15: tweet_content

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def tweet_content():
    """Generate tweet string (140 characters or less)
    """

#    with open('basho.txt', 'r') as content_file:
#        content = content_file.read()
    r = requests.get("http://novicevagabond.com/projects/haiku/basho.txt")
    content = r.content
     
    nltk.data.path.append("nltk_data/")
    nltk.data.path.append("nltk_data/punkt")
    nltk.data.path.append("fizzle_dizzle/")
#    nltk.download()

#print content

    tokenizer = BlanklineTokenizer()
    cleaned_content = content.lower()
    corpus = TextBlob(cleaned_content,  tokenizer=tokenizer)

    haiku = corpus.sentences
#print haiku

    bigrams = corpus.ngrams(n=2)
    trigrams = corpus.ngrams(n=3)

#print bigrams
    dict = {}
    for bigram in bigrams:
        k = bigram[0]
        v = bigram[1]
        if k in dict:
            if v in dict[k]:
                dict[k][v] = dict[k][v] + 1
            else:
                dict[k][v] = 1
        else:
            dict[k] = { v : 1}

#print dict

    def weighted_choice(map):
        choices = [] 
        for k in map:
            #print k 
            for n in range(1, map[k] + 1):
                choices.append(k)
        #print choices
        choice = random.choice(choices)
        #print choice
        return choice

    seed = random.choice(dict.keys())
    length = random.randint(11,15) 

    output = [seed]
#print output
    for i in range(length):
        output.append(weighted_choice(dict[output[i]]))

    whitespace = " "
    line1 = whitespace.join(output[0:4])
    line2 = whitespace.join(output[4:9])
    line3 = whitespace.join(output[9:])
    line4 = "-- #markov_basho_haiku"
    sep = "\n"
    tweet = sep.join([line1, line2, line3, line4]);
#    print tweet
    return tweet 
开发者ID:nathanlubchenco,项目名称:tau,代码行数:71,代码来源:tweet.py


注:本文中的textblob.TextBlob.ngrams方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。