本文整理汇总了Python中textblob.TextBlob.ngrams方法的典型用法代码示例。如果您正苦于以下问题:Python TextBlob.ngrams方法的具体用法?Python TextBlob.ngrams怎么用?Python TextBlob.ngrams使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类textblob.TextBlob
的用法示例。
在下文中一共展示了TextBlob.ngrams方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: check_speech_patterns
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def check_speech_patterns(text):
PATTERNS={
("PRP","DT"),
("CC","VBD"),
("VB","RB"),
("VB","PRP$"),
("NN","POS"),
("NN","MD","VB"),
("VB","PRP$","NN"),
("MD","VB","VBN"),
("NN","IN","PRP$"),
("IN","PRP$","JJ"),
("VB","PRP","DT","NN"),
("VBD","RB","JJ","NNS"),
("NNP","NNP","NNP","NNP"),
("PRP$","NN","CC","PRP"),
("NNP", "NNP", "NNP", "NNP", "NNP"),
("NN", "IN", "DT", "NNS", "IN"),
("PRP$", "NN", "IN", "DT", "NN"),
("IN", "DT", "NN", "WDT", "VBZ"),
("NN", "IN", "PRP$", "JJ", "NN"),
("DT", "NN", "IN", "NN", "NN")
}
blob= TextBlob(text)
for i in range (2,6):
ngrams=blob.ngrams(n=i)
for gram in ngrams:
str_gram=" ".join(gram)
gram_blob=TextBlob(str_gram)
tags=gram_blob.tags
lst1, lst2 = zip(*tags)
if lst2 in PATTERNS:
return True
return False
示例2: sentiment_pattern
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def sentiment_pattern(text, gram_n=6):
blob= TextBlob(text)
ngrams=blob.ngrams(n=gram_n)
sentiment_list=[]
datalist = []
for gram in ngrams:
str_gram=" ".join(gram)
print str_gram
data = (0, 0, str_gram, None)
datalist.append(Datapoint(*data))
#gram_blob=TextBlob(str_gram)
#sentiment=gram_blob.sentiment[0]
#if sentiment>0:
# sentiment=1
#elif sentiment<0:
# sentiment=-1
#sentiment_list.append(sentiment)
predictor = pickle.load(open("predictor.pickle", "rb" ) )
prediction = predictor.predict(datalist)
for sentiment in prediction:
sentiment = int(sentiment)
if sentiment < 2: sentiment_list.append(-1)
if sentiment == 2: sentiment_list.append(0)
if sentiment > 2: sentiment_list.append(1)
print sentiment_list
return sentiment_list
示例3: get_tupels
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def get_tupels(text):
lower = text.lower()
blob = TextBlob(lower)
ngrams = blob.ngrams(n=2) # assumption: don't is two words (do n't), as in "do not"
# this can be easily changed by modifying the tokenizer
# http://stackoverflow.com/questions/30550411
tuples = map(tuple,map(tuple, ngrams))
return tuples
示例4: sentiment_reviews
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def sentiment_reviews(reviews, gram_n=5, predictor=None):
datalist = []
tag = []
counttag = [0] * len(reviews)
for (i, review) in enumerate(reviews):
blob = TextBlob(review)
ngrams=blob.ngrams(n=min(gram_n, len(blob.words)))
for gram in ngrams:
str_gram=" ".join(gram)
data = (0, 0, str_gram, None)
datalist.append(Datapoint(*data))
tag.append(i)
counttag[i] += 1
print "start prediction"
prediction = predictor.predict(datalist)
cstm = [[0] * 5 for x in reviews]
for (i, sentiment) in enumerate(prediction):
sentiment = int(sentiment)
cstm[tag[i]][sentiment] += 1.0 / counttag[tag[i]]
trating = 0.0
tcount = 0.0
for i in range(len(reviews)):
if counttag[i] == 0:
continue
cstm[i][2] = cstm[i][2] / math.pow(counttag[i], 0.44)
cstm[i][0] = cstm[i][0] * math.pow(counttag[i], 0.22)
cstm[i][3] = cstm[i][3] * math.pow(counttag[i], 0.22)
rating = 0.0
count = 0.0
for j in range(5):
rating += (j + 1) * cstm[i][j]
count += cstm[i][j]
print cstm[i], " ", counttag[i]
t = 1 / (1 + math.exp(-(cstm[i][2] / count - 0.45) * 15))
print cstm[i][2] / count
trating += rating / count * (1 - t)
tcount += 1 - t
trating = trating / tcount
if trating > 3:
x = trating - 3
x = math.pow(x, 0.4647) * 1.4492
return x + 3
else:
x = 3 - trating
x = math.pow(x, 0.4647) * 1.4492
return 3 - x
示例5: GetBigrams
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def GetBigrams(text):
blob = TextBlob(text)
WordLists = blob.ngrams(n = 2)
Bigrams = []
for wordlist in WordLists:
cstr = ''
for word in wordlist: cstr = cstr+word+"_"
Bigrams.append(cstr)
return Bigrams
示例6: build_ngrams
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def build_ngrams(text, language='en'):
blob = TextBlob(lower(text, language))
ngrams = [blob.ngrams(n=n) for n in (3, 2, 1)]
wordlists = reduce(operator.add, ngrams)
tokenized = (
tokenize(wordlist, language, stem=True)
for wordlist in wordlists)
pure = (
tokenize(wordlist, language, stem=False)
for wordlist in wordlists)
return itertools.chain(tokenized, pure)
示例7: _get_detailed_stats
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def _get_detailed_stats(no_code_text):
"""
Returns detailed stats on text
:param no_code_text: String to analyse
:return: list of details
"""
results = []
group_by = 'Detailed Text Statistics'
tb = TextBlob(no_code_text)
# Spell check here...it's very slow
results.append(TextFeature('Number of sentences', textstat.sentence_count(no_code_text), group_by))
results.append(TextFeature('Number of sentences (again)', len(tb.sentences), group_by))
results.append(TextFeature('Number of words', len(tb.words), group_by))
results.append(TextFeature('Sentiment Polarity', tb.sentiment.polarity, group_by))
results.append(TextFeature('Sentiment Subjectivity', tb.sentiment.subjectivity, group_by))
results.append(TextFeature('Detected Language', tb.detect_language(), group_by))
results.append(TextFeature('Number of important phrases', len(tb.noun_phrases), group_by))
results.append(TextFeature('Number of word bi-grams', len(tb.ngrams(2)), group_by))
results.append(TextFeature('Number of word tri-grams', len(tb.ngrams(3)), group_by))
results.append(TextFeature('Number of word 4-grams', len(tb.ngrams(4)), group_by))
return results
示例8: extract_trigrams
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def extract_trigrams(client):
documents = client['cornell']['documents']
for doc in documents.find():
blob = TextBlob(doc['text'])
valid_trigrams = []
for s in blob.sentences:
sentence = TextBlob(s.dict['raw'])
sentence = TextBlob(sentence.parse())
trigrams = sentence.ngrams(n=3)
valid_trigrams = valid_trigrams + get_valid_trigrams(trigrams)
documents.update({'name':doc['name']},{'$set':{'trigrams':valid_trigrams}})
示例9: getNGrams
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def getNGrams(text, n):
blob = TextBlob(text)
listofBlobs = blob.ngrams(n)
listofBigrams = []
for wordList in listofBlobs:
flag = True
for item in wordList:
if flag:
bigram = unicode(item)
flag = False
else:
bigram = bigram + " "+ unicode(item)
# print type(bigram)
listofBigrams.append(bigram)
return listofBigrams
示例10: names_ext
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def names_ext(sentence):
''' Extracts Names using first_name_search and last_name_search '''
sentence = TextBlob(sentence)
possible_names = sentence.noun_phrases
print "NOUN PHRASES: ", possible_names
sentence = sentence.ngrams(n=2)
names = []
female_first = open('./Names_db/Females_Firsts.txt').read().strip().split("\n")
male_first = open('./Names_db/Males_Firsts.txt').read().strip().split("\n")
all_last = open('./Names_db/Last_Namess.txt').read().strip().split("\n")
for phrases in sentence:
female_names = first_name_search(phrases[0],female_first)
male_names = first_name_search(phrases[0],male_first)
last_names = last_name_search(phrases[1],all_last)
if female_names and male_names and last_names != "None":
print female_names
print male_names
print last_names, "\n"
return "None"
示例11: blogWords
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def blogWords(self):
regex1 = '[^a-zA-Z0-9-/]'
regex2 = '[^a-zA-Z0-9-\'\"/]'
filename = 'blogwords.txt'
i = 0
textblob = TextBlob(" ".join(self.listOfWords))
#load blog words text file
blogWords_file = open(filename, 'r')
#line represents a blog word
for line in blogWords_file:
#Remove non-alphanumeric characters in sequence
line = re.sub(regex2, ' ', line)
#array of words in line
lineArray = [x.lower() for x in line.split()]
#entry represents an n-gram instance of the input text
for entry in textblob.ngrams(n = len(lineArray)):
entry = [re.sub(regex1, '', x).lower() for x in entry]
if lineArray == entry:
i += 1
return i
示例12: sentiment_pattern
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def sentiment_pattern(text, gram_n=6, predictor=None):
blob= TextBlob(text)
ngrams=blob.ngrams(n=gram_n)
sentiment_list=[]
datalist = []
for gram in ngrams:
str_gram=" ".join(gram)
data = (0, 0, str_gram, None)
datalist.append(Datapoint(*data))
prediction = predictor.predict(datalist)
for sentiment in prediction:
sentiment = int(sentiment)
if sentiment < 2: sentiment_list.append(-1)
if sentiment == 2: sentiment_list.append(0)
if sentiment > 2: sentiment_list.append(1)
return sentiment_list
"""
示例13: index
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def index():
response.content_type = 'text/text; charset=utf-8'
ret = 'Hi there, I\'m process {0}!\n\n'.format(os.getpid())
sentence = 'Now is better than never.'
ret += 'Testing TextBlob ngram (n=3) with sentence: \n "{0}" \n'.format(sentence)
blob = TextBlob(sentence)
for word_list in blob.ngrams(n=3):
ret += (' '.join(word_list) + '\n')
data = pd.DataFrame({'A': np.random.randn(3), 'B': np.random.randn(3)})
func = "pd.DataFrame({'A': np.random.randn(3), 'B': np.random.randn(3)})"
ret += '\nTesting Numpy and Pandas with command: \n {0} \n{1} \n'.format(func, data.to_json())
ret += '\nCode at: \n https://github.com/alyssaq/bottle-heroku-skeleton \n'
ret += '\nEnvironment vars:\n'
for k, v in env.iteritems():
if 'bottle.' in k:
continue
ret += '%s=%s\n' % (k, v)
return ret
示例14: on_success
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def on_success(self, data):
# Digest
if 'text' in data:
line = data['text']
# Do n-grams
blob = TextBlob(line)
ngrams = list(blob.ngrams(n=2))
for ng in ngrams:
for word in list(ng):
word = word.lower()
if word in self.stemmer.stems.keys():
word = self.stemmer.stems[word]
match = re.search('\w+',word)
if match:
word = match.group()
if word in self.stop_words:
word = ''
if ng[0] and ng[1]:
if ' '.join(ng) in self.bookshelf.keys():
self.bookshelf[' '.join(ng)] += 1
else:
self.bookshelf[' '.join(ng)] = 1
'''
# Do Unigrams
for word in line.split(' '):
word = word.lower()
# Stemming
if word in self.stemmer.stems.keys():
word = self.stemmer.stems[word]
# Removing punctuation
match = re.search('\w+',word)
if match:
word = match.group()
# Stop words
if word not in self.stop_words:
if word in self.bookshelf.keys():
self.bookshelf[word] += 1
else:
self.bookshelf[word] = 1
'''
self.count += 1
# How often to update? Framerate will depend on the number of relevant
# tweets, so one size does not necessarily fit all
it = 5
# Move forward
if self.count % it == 0:
sorted_words = sorted(self.bookshelf.items(), key=operator.itemgetter(1), reverse=True)
clear()
print('=== Update ==='.format(self.count))
for i in range(0,20):
print('{}): {} [{}]'.format(str(i+1),sorted_words[i][0],sorted_words[i][1]))
示例15: tweet_content
# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import ngrams [as 别名]
def tweet_content():
"""Generate tweet string (140 characters or less)
"""
# with open('basho.txt', 'r') as content_file:
# content = content_file.read()
r = requests.get("http://novicevagabond.com/projects/haiku/basho.txt")
content = r.content
nltk.data.path.append("nltk_data/")
nltk.data.path.append("nltk_data/punkt")
nltk.data.path.append("fizzle_dizzle/")
# nltk.download()
#print content
tokenizer = BlanklineTokenizer()
cleaned_content = content.lower()
corpus = TextBlob(cleaned_content, tokenizer=tokenizer)
haiku = corpus.sentences
#print haiku
bigrams = corpus.ngrams(n=2)
trigrams = corpus.ngrams(n=3)
#print bigrams
dict = {}
for bigram in bigrams:
k = bigram[0]
v = bigram[1]
if k in dict:
if v in dict[k]:
dict[k][v] = dict[k][v] + 1
else:
dict[k][v] = 1
else:
dict[k] = { v : 1}
#print dict
def weighted_choice(map):
choices = []
for k in map:
#print k
for n in range(1, map[k] + 1):
choices.append(k)
#print choices
choice = random.choice(choices)
#print choice
return choice
seed = random.choice(dict.keys())
length = random.randint(11,15)
output = [seed]
#print output
for i in range(length):
output.append(weighted_choice(dict[output[i]]))
whitespace = " "
line1 = whitespace.join(output[0:4])
line2 = whitespace.join(output[4:9])
line3 = whitespace.join(output[9:])
line4 = "-- #markov_basho_haiku"
sep = "\n"
tweet = sep.join([line1, line2, line3, line4]);
# print tweet
return tweet