本文整理汇总了Python中textblob.tb函数的典型用法代码示例。如果您正苦于以下问题:Python tb函数的具体用法?Python tb怎么用?Python tb使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了tb函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse_book
def parse_book(book_file):
# chapter titles are all caps and only one word
title_pattern = re.compile("^[A-Z]+$")
book = []
chapter = []
i = 0
with open(book_file, 'r') as f:
for line in f:
line = line.rstrip()
if line:
if title_pattern.match(line):
# if there's something in the chapter, put it in the book
if chapter:
i += 1
chapter = ' '.join(chapter)
book.append(tb(chapter))
chapter = []
else:
# preprocess line and put into chapter
line = preprocess(line)
chapter.append(line)
# put the last chapter in the book
i += 1
chapter = ' '.join(chapter)
book.append(tb(chapter))
return(book)
示例2: rankDocs
def rankDocs(keywordList, doclistTuples):
scores = {}
docList = [tb(doc[1].decode('utf-8')) for doc in doclistTuples]
for doc in doclistTuples:
scores[doc[0]] = scoreDoc(keywordList, tb(doc[1].decode('utf-8')), docList)
sortedDocs = sorted(scores.items(), key=lambda x: x[1], reverse = True)
return sortedDocs[:10]
示例3: setBlob
def setBlob(self,blob_):
paragraph = filter(lambda x: x in printable, blob_)
blob = tb(paragraph)
newBlob = ""
if(self.stemming):
for word in blob.words:
newBlob+=" "+(stem(word.lower()))
self.blob = tb(newBlob)
示例4: main
def main():
# Takes in commandLine args, and sorts variables if necessary.
parser = argparse.ArgumentParser(description='Analyze Blogs.', formatter_class=RawTextHelpFormatter)
parser.add_argument('-b', '--blog', help='Manually enter the blog text here as a string. Formatted like:\n\nauthor: "authors name"\ntitle: "title"\nblog: "blog text"', default=None)
parser.add_argument('-a', '--author', help='Enter the authors name as a string', default=None)
parser.add_argument('-t', '--title', help='Enter the blogs title as a string', default=None)
parser.add_argument('-i', '--inFile', help='Enter the path to a plain text file with the blog entry in it', default=None)
args = parser.parse_args()
# Save variables from commandline args
newBlogFile = args.inFile
newBlogText = args.blog
newBlogAuthor = args.author
newBlogTitle = args.title
go = True
while(go):
# The below object is a dictionary of 2 dictionaries, good and bad features, and their relevant metadata.
# count is the number of times blogs have been passed through. This is necessary for updates.
features = {"good":{"count": 0, "words": [], "names": 0.0, "religion": 0.0, "weaponry": 0.0, "government": 0.0}, "bad": {"count": 0, "words": [], "names": 0.0, "religion": 0.0, "weaponry": 0.0, "government": 0.0}}
json_data = importJSON("Writings/writings.json") # get JSON data, creating a dictionary-like object
# Declaring lists of writings
badBlogList = []
goodBlogList = []
# Analyze the current data in the JSON file.
for blog in json_data["writings"]["bad"]:
badBlogList.append(tb(blog["post"]))
for blog in json_data["writings"]["good"]:
goodBlogList.append(tb(blog["post"]))
analysisResults = analyzeBlogs(badBlogList)
features["bad"]["count"], features["bad"]["words"], features["bad"]["names"], features["bad"]["religion"], features["bad"]["weaponry"], features["bad"]["government"] = len(badBlogList), analysisResults.outputsWordsArray, analysisResults.namesScore, analysisResults.religionScore, analysisResults.weaponryScore, analysisResults.governmentScore
analysisResults = analyzeBlogs(goodBlogList)
features["good"]["count"], features["good"]["words"], features["good"]["names"], features["good"]["religion"], features["good"]["weaponry"], features["good"]["government"] = len(goodBlogList), analysisResults.outputsWordsArray, analysisResults.namesScore, analysisResults.religionScore, analysisResults.weaponryScore, analysisResults.governmentScore
print("Current writings in database have been analyzed... \nRunning comparisons against provided writing...\n ----------------------------")
newBlog = None
# Analyze new file
if newBlogFile is not None:
newBlog = buildNewBlog(newBlogFile)
elif newBlogText is not None:
newBlog = buildNewBlog(None, newBlogAuthor, newBlogTitle, newBlogText)
if newBlog is not None:
tempFeatures = {"words": [], "names": 0.0, "religion": 0.0, "weaponry": 0.0, "government": 0.0}
analyzeNewBlog(newBlog.post, goodBlogList, badBlogList, features)
print ("Please enter another file for analysis. or 'quit' to quit.\n")
newBlogFile = input('File path: ')
if newBlogFile == "quit" or newBlogFile == "Quit" or newBlogFile == "q":
go = False
print("Closing program...")
示例5: readcontent
def readcontent(self):
ope=open('Cs.txt','r')
ope1=open('Is.txt','r')
ope2=open('It.txt','r')
self.CS_Fild=ope.read().lower()
self.Is_filed=ope1.read().lower()
self.IT_field=ope2.read().lower()
self.Cs=tb(self.CS_Fild)
self.Is=tb(self.Is_filed)
self.It=tb(self.IT_field)
self.bloblist = [self.Cs,self.Is,self.It]
示例6: extract
def extract(text):
bloblist = []
with open("clean_text.csv") as f:
reader = csv.DictReader(f)
for row in reader:
bloblist.append(tb(row['post_text']))
blob = tb(text)
scores = {word: tfidf(word, blob, bloblist) for word in blob.words}
sorted_words = sorted(scores.items(), key=lambda x: x[1], reverse=True)
words = ''
for word, score in sorted_words[:15]:
words += word + ' '
return words
示例7: analyzeNewBlog
def analyzeNewBlog(blog, goodBlogList, badBlogList, features):
# Get word densities of the new blog
namesCount, religionCount, weaponryCount, governmentCount, wordCount = 0, 0, 0, 0, 0
for word in tb(blog):
wordCount += 1
if word in terms.governmentTerms(): # increment count based on content to find word densities.
governmentCount += 1
if word in terms.weaponsTerms():
weaponryCount += 1
if word in terms.femaleNames() or word in terms.maleNames():
namesCount += 1
if word in terms.religiousTerms():
religionCount += 1
analysisOutputs = AnalysisObject(namesCount/wordCount,religionCount/wordCount,weaponryCount/wordCount,governmentCount/wordCount, None)
# Compare to the analyzed ones.
scores = {"good": 0.0, "bad": 0.0}
for upperKey in features:
print ("\nComparing this blog to " + upperKey.upper() + " blogs:\n")
for lowerKey in features[upperKey]:
if lowerKey == "words":
for word in features[upperKey][lowerKey]:
if word[0] not in terms.stopWords():
if word[0] in blog:
print ("Word found in " + upperKey + " blog: " + word[0])
scores[upperKey] += word[1] * 100 # If a word is found, update the score relative to its TFIDF score.
elif lowerKey == "religion": # This next section is to compare the density of a term of the new blog compared to the density of that term in the analyzed blogs.
scores[upperKey] -= abs(features[upperKey][lowerKey] - analysisOutputs.religionScore)
print ("Religion variance: " + str(features[upperKey][lowerKey] - analysisOutputs.religionScore))
elif lowerKey == "government":
scores[upperKey] -= abs(features[upperKey][lowerKey] - analysisOutputs.governmentScore)
print ("Government variance: " + str(abs(features[upperKey][lowerKey] - analysisOutputs.governmentScore)))
elif lowerKey == "weaponry":
scores[upperKey] -= abs(features[upperKey][lowerKey] - analysisOutputs.weaponryScore)
print ("Weaponry variance: " + str(abs(features[upperKey][lowerKey] - analysisOutputs.weaponryScore)))
elif lowerKey == "names":
scores[upperKey] -= abs(features[upperKey][lowerKey] - analysisOutputs.namesScore)
print ("Names variance: " + str(abs(features[upperKey][lowerKey] - analysisOutputs.namesScore)))
print ("\nFinal Scores:\n" + "Bad: " + str(scores["bad"]) + "\nGood: " + str(scores["good"]) + "\n")
if abs(scores["good"] - scores["bad"]) < .5:
print ("This post does not trend towards 'good; or 'bad'.")
else:
if scores["good"] > scores["bad"]:
print ("This post has been marked as 'good'.")
goodBlogList.append(tb(blog)) # Add term to the blog list. If this program were running constantly, it would be included in the next baes analysis.
else:
print ("This post has been flagged as 'bad'.")
badBlogList.append(tb(blog))
print ("\n---------------------------------------")
示例8: get_tfidf_values
def get_tfidf_values(self, sentence):
blob = tb(sentence)
self.bloblist.append(blob)
blob_list = self.bloblist[:]
# blobList.append(blob)
single_words = blob.words
pairs = [Word(single_words[i] + ' ' + single_words[i + 1]) for i in range(len(single_words) - 1)]
scores_pairs = {word: self.__tfidf__(word, blob, blob_list, 2) for word in pairs}
sorted_words_pairs = sorted(scores_pairs.items(), key=lambda x: x[1], reverse=True)
scores_single = {word: self.__tfidf__(word, blob, blob_list, 1) for word in blob.words}
sorted_words_single = sorted(scores_single.items(), key=lambda x: x[1], reverse=True)
# sorted_words = sorted(sorted_words_pairs + sorted_words_single, key=lambda x: x[1], reverse=True)
ds = 0
nmd = 0
tec = 0
for i, word in enumerate(sorted_words_single):
ds += self.__ds_check__(word[0]) * word[1]
nmd += self.__nmd_check__(word[0]) * word[1]
tec += self.__tec_check__(word[0]) * word[1]
for i, word in enumerate(sorted_words_pairs):
ds += self.__ds_check__(word[0]) * word[1]
nmd += self.__nmd_check__(word[0]) * word[1]
tec += self.__tec_check__(word[0]) * word[1]
return [ds, nmd, tec]
示例9: features_pos_tag
def features_pos_tag(self):
blob = tb('.'.join([self.title,self.short,self.need,self.essay]))
counts = Counter(tag for word,tag in blob.tags)
total = sum(counts.values())
ratio_dict = tag_dict.copy()
ratio_dict.update(dict((word, float(count)/total) for word,count in counts.items()))
return tuple(map(lambda k: ratio_dict[k], tag_list))
示例10: get_tweet_info
def get_tweet_info(tweet):
processed_tweet = {
'tweet_id': tweet.id_str,
'created_by_id': tweet.user.id,
'created_at': tweet.created_at,
'text': tweet.text,
'coordinates': tweet.coordinates,
# Note: only returns a non-zero favorite_count for an original
# tweet. We'd need to look up the original tweet itself to get
# the favorite_count, which is possible.
'favorite_count': tweet.favorite_count,
'retweet_count': tweet.retweet_count
# This favorited field only tells us if we, the authenticated user have
# favorited this tweet, which isn't that helpful.
# 'favorited': tweet.favorited,
}
if 'hashtags' in tweet.entities:
processed_tweet['hashtags'] = tweet.entities['hashtags']
else:
processed_tweet['hashtags'] = None
if 'media' in tweet.entities:
processed_tweet['media'] = tweet.entities['media']
else: processed_tweet['media'] = None
# Get Sentiment
blob = tb(tweet.text)
sentiment = {'polarity': blob.sentiment.polarity,
'subjectivity': blob.sentiment.subjectivity
}
processed_tweet['sentiment'] = sentiment
return processed_tweet
示例11: stemming
def stemming(doc):
d = toker.tokenize(doc)
d = [k for k in d if k not in cachedStopWords]
for i in range(0,len(d)):
d[i]=lemma.lemmatize(d[i])
return tb(" ".join(d))
示例12: __init__
def __init__(self,graph):
self.bloblist = []
for node in graph.nodes():
try:
self.bloblist.append(tb(graph.node[node]['abstract']))
except:
print "No abstract for node ",node
示例13: buildTestData
def buildTestData(self):
self.testBloblist = {}
for key, value in self.dev.iteritems():
content = '. '.join(self.dev[key]['content'])
content.replace('..','.')
self.testBloblist[key] = (tb(content))
self.testBloblistLength = len(self.testBloblist)
示例14: extract
def extract(storyString):
storyText = tb(storyString)
results = []
for sentence in storyText.sentences: # split text into sentences
results.append(analyze_sent_semantics(sentence))
return results
示例15: main
def main():
#print 'Hello there'
# Command line args are in sys.argv[1], sys.argv[2] ...
# sys.argv[0] is the script name itself and can be ignored
dataList = []
for f in os.listdir('documents'):
filePath = 'documents\\' + f
#print filePath
fileName, fileExtension = os.path.splitext(filePath)
#print fileExtension
if fileExtension.lower() == '.docx':
print '' #'its a {0} {1}{2}'.format('word document', fileName, fileExtension)
doc = docxDocument(filePath)
for p in doc.paragraphs:
dataList.append(p.text) #print p.text
#print "-------------------------------"
elif fileExtension.lower() == '.pdf':
print '' #'its a {0} {1}{2}'.format('pdf document', fileName, fileExtension)
# with open(filePath) as f:
# doc = slate.PDF(f)
# print doc[1]
# exit()
#TODO
elif ((fileExtension.lower() == '.html') or (fileExtension.lower() == '.htm')):
print '' #'its a {0} {1}{2}'.format('html file', fileName, fileExtension)
with codecs.open (filePath, errors='ignore') as myfile:
source = myfile.read()
article = Document(source).summary()
title = Document(source).title()
soup = BeautifulSoup(article, 'lxml')
final = replaceTwoOrMore((title.replace('\n', ' ').replace('\r', '') + '.' + soup.text.replace('\n', ' ').replace('\r', '')))
dataList.append(final)
#print '*** TITLE *** \n\"' + title + '\"\n'
#print '*** CONTENT *** \n\"' + soup.text + '[...]\"'
else:
print '' # 'undectected document type'
print '' #"-------------------------------"
#print dataList
#for i in dataList:
# print i
cachedStopWords = stopwords.words("english")
combined = ' '.join(dataList)
#print combined
bloblist = [tb(combined)]
for i, blob in enumerate(bloblist):
print("Top words in document {}".format(i + 1))
scores = {word: tfidf(word, blob, bloblist) for word in blob.words if word not in nltk.corpus.stopwords.words('english')}
#print scores
sorted_words = sorted(scores.items(), key=lambda x: x[1], reverse=True)
#print sorted_words
for word, score in sorted_words:
print("\tWord: {}, TF-IDF: {}".format(word, round(score, 5)))