当前位置: 首页>>代码示例>>Python>>正文


Python textblob.tb函数代码示例

本文整理汇总了Python中textblob.tb函数的典型用法代码示例。如果您正苦于以下问题:Python tb函数的具体用法?Python tb怎么用?Python tb使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了tb函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: parse_book

def parse_book(book_file):
    # chapter titles are all caps and only one word
    title_pattern = re.compile("^[A-Z]+$")
    book    = []
    chapter = []
    i = 0
    with open(book_file, 'r') as f:
        for line in f:
            line = line.rstrip()
            if line:
                if title_pattern.match(line):
                    # if there's something in the chapter, put it in the book
                    if chapter:
                        i += 1
                        chapter = ' '.join(chapter)
                        book.append(tb(chapter))
                    chapter = []
                else:
                    # preprocess line and put into chapter
                    line = preprocess(line)
                    chapter.append(line)
        # put the last chapter in the book
        i += 1
        chapter = ' '.join(chapter)
        book.append(tb(chapter))
    return(book)
开发者ID:genbien,项目名称:GoT,代码行数:26,代码来源:most_common_words_in_chapters_tfidf.py

示例2: rankDocs

def rankDocs(keywordList, doclistTuples):
	scores = {}
	docList = [tb(doc[1].decode('utf-8')) for doc in doclistTuples]
	for doc in doclistTuples:
		scores[doc[0]] = scoreDoc(keywordList, tb(doc[1].decode('utf-8')), docList)

	sortedDocs = sorted(scores.items(), key=lambda x: x[1], reverse = True)
	return sortedDocs[:10]
开发者ID:kenumovies,项目名称:question-answer-system,代码行数:8,代码来源:tfidf_scores.py

示例3: setBlob

 def setBlob(self,blob_):
     paragraph = filter(lambda x: x in printable, blob_)
     blob = tb(paragraph)
     newBlob = ""
     if(self.stemming):
         for word in blob.words:
             newBlob+=" "+(stem(word.lower()))
             
     self.blob = tb(newBlob)
开发者ID:luciencd,项目名称:suggestr,代码行数:9,代码来源:courseSimilarity.py

示例4: main

def main():
    # Takes in commandLine args, and sorts variables if necessary. 
    parser = argparse.ArgumentParser(description='Analyze Blogs.', formatter_class=RawTextHelpFormatter)
    parser.add_argument('-b', '--blog', help='Manually enter the blog text here as a string. Formatted like:\n\nauthor: "authors name"\ntitle: "title"\nblog: "blog text"', default=None)
    parser.add_argument('-a', '--author', help='Enter the authors name as a string', default=None)
    parser.add_argument('-t', '--title', help='Enter the blogs title as a string', default=None)
    parser.add_argument('-i', '--inFile', help='Enter the path to a plain text file with the blog entry in it', default=None)
    args = parser.parse_args()

    # Save variables from commandline args
    newBlogFile = args.inFile
    newBlogText = args.blog
    newBlogAuthor = args.author
    newBlogTitle = args.title
    go = True
    while(go):    
        # The below object is a dictionary of 2 dictionaries, good and bad features, and their relevant metadata. 
        # count is the number of times blogs have been passed through. This is necessary for updates.
        features = {"good":{"count": 0, "words": [], "names": 0.0, "religion": 0.0, "weaponry": 0.0, "government": 0.0}, "bad": {"count": 0, "words": [], "names": 0.0, "religion": 0.0, "weaponry": 0.0, "government": 0.0}}
        
        json_data = importJSON("Writings/writings.json") # get JSON data, creating a dictionary-like object 
        
        # Declaring lists of writings
        badBlogList = []
        goodBlogList = []
        
        # Analyze the current data in the JSON file. 
        for blog in json_data["writings"]["bad"]:
        	badBlogList.append(tb(blog["post"]))
        for blog in json_data["writings"]["good"]:
        	goodBlogList.append(tb(blog["post"]))
        analysisResults = analyzeBlogs(badBlogList)
        features["bad"]["count"], features["bad"]["words"], features["bad"]["names"], features["bad"]["religion"], features["bad"]["weaponry"], features["bad"]["government"] = len(badBlogList), analysisResults.outputsWordsArray, analysisResults.namesScore, analysisResults.religionScore, analysisResults.weaponryScore, analysisResults.governmentScore
        analysisResults = analyzeBlogs(goodBlogList)
        features["good"]["count"], features["good"]["words"], features["good"]["names"], features["good"]["religion"], features["good"]["weaponry"], features["good"]["government"] = len(goodBlogList), analysisResults.outputsWordsArray, analysisResults.namesScore, analysisResults.religionScore, analysisResults.weaponryScore, analysisResults.governmentScore
    
        print("Current writings in database have been analyzed... \nRunning comparisons against provided writing...\n ----------------------------")
        
        newBlog = None
        # Analyze new file
        if newBlogFile is not None:
            newBlog = buildNewBlog(newBlogFile)
        elif newBlogText is not None:
            newBlog = buildNewBlog(None, newBlogAuthor, newBlogTitle, newBlogText)
        
        if newBlog is not None:
            tempFeatures = {"words": [], "names": 0.0, "religion": 0.0, "weaponry": 0.0, "government": 0.0}
            analyzeNewBlog(newBlog.post, goodBlogList, badBlogList, features)

        print ("Please enter another file for analysis. or 'quit' to quit.\n")
        newBlogFile = input('File path: ')
        if newBlogFile == "quit" or newBlogFile == "Quit" or newBlogFile == "q":
            go = False
    
    print("Closing program...")
开发者ID:dfrank8,项目名称:NaturalLanguageProcessor,代码行数:55,代码来源:NaturalLanguage.py

示例5: readcontent

 def readcontent(self):
     
     ope=open('Cs.txt','r')
     ope1=open('Is.txt','r')
     ope2=open('It.txt','r')
     self.CS_Fild=ope.read().lower()
     self.Is_filed=ope1.read().lower()
     self.IT_field=ope2.read().lower()
     self.Cs=tb(self.CS_Fild)
     self.Is=tb(self.Is_filed)
     self.It=tb(self.IT_field)
     self.bloblist = [self.Cs,self.Is,self.It]
开发者ID:hassanabdelhalim23,项目名称:Recommendation-System,代码行数:12,代码来源:anyalzedoc.py

示例6: extract

 def extract(text):
     bloblist = []
     with open("clean_text.csv") as f:
         reader = csv.DictReader(f)
         for row in reader:
             bloblist.append(tb(row['post_text']))
     blob = tb(text)
     scores = {word: tfidf(word, blob, bloblist) for word in blob.words}
     sorted_words = sorted(scores.items(), key=lambda x: x[1], reverse=True)
     words = ''
     for word, score in sorted_words[:15]:
         words += word + ' '
     return words
开发者ID:AlexandrShestak,项目名称:nlp,代码行数:13,代码来源:MainWordsExtractor.py

示例7: analyzeNewBlog

def analyzeNewBlog(blog, goodBlogList, badBlogList, features):
    # Get word densities of the new blog
    namesCount, religionCount, weaponryCount, governmentCount, wordCount = 0, 0, 0, 0, 0
    for word in tb(blog):
        wordCount += 1
        if word in terms.governmentTerms(): # increment count based on content to find word densities. 
            governmentCount += 1
        if word in terms.weaponsTerms():
            weaponryCount += 1
        if word in terms.femaleNames() or word in terms.maleNames():
            namesCount += 1
        if word in terms.religiousTerms():
            religionCount += 1
    analysisOutputs = AnalysisObject(namesCount/wordCount,religionCount/wordCount,weaponryCount/wordCount,governmentCount/wordCount, None)
   
   # Compare to the analyzed ones.
    scores = {"good": 0.0, "bad": 0.0}
    for upperKey in features:
        print ("\nComparing this blog to " + upperKey.upper() + " blogs:\n")
        for lowerKey in features[upperKey]:
            if lowerKey == "words":
                for word in features[upperKey][lowerKey]:
                    if word[0] not in terms.stopWords():
                        if word[0] in blog:
                            print ("Word found in " + upperKey + " blog: " + word[0])
                            scores[upperKey] += word[1] * 100 # If a word is found, update the score relative to its TFIDF score. 
            elif lowerKey == "religion": # This next section is to compare the density of a term of the new blog compared to the density of that term in the analyzed blogs. 
                scores[upperKey] -= abs(features[upperKey][lowerKey] - analysisOutputs.religionScore)
                print ("Religion variance: " + str(features[upperKey][lowerKey] - analysisOutputs.religionScore))
            elif lowerKey == "government":
                scores[upperKey] -= abs(features[upperKey][lowerKey] - analysisOutputs.governmentScore)
                print ("Government variance: " + str(abs(features[upperKey][lowerKey] - analysisOutputs.governmentScore)))
            elif lowerKey == "weaponry":
                scores[upperKey] -= abs(features[upperKey][lowerKey] - analysisOutputs.weaponryScore)
                print ("Weaponry variance: " + str(abs(features[upperKey][lowerKey] - analysisOutputs.weaponryScore)))
            elif lowerKey == "names":
                scores[upperKey] -= abs(features[upperKey][lowerKey] - analysisOutputs.namesScore)
                print ("Names variance: " + str(abs(features[upperKey][lowerKey] - analysisOutputs.namesScore)))
    print ("\nFinal Scores:\n" + "Bad: " + str(scores["bad"]) + "\nGood: " + str(scores["good"]) + "\n")
    if abs(scores["good"] - scores["bad"]) < .5:
        print ("This post does not trend towards 'good; or 'bad'.")
    else:
        if scores["good"] > scores["bad"]:
            print ("This post has been marked as 'good'.")
            goodBlogList.append(tb(blog)) # Add term to the blog list. If this program were running constantly, it would be included in the next baes analysis.
        else: 
            print ("This post has been flagged as 'bad'.")
            badBlogList.append(tb(blog))
    print ("\n---------------------------------------")
开发者ID:dfrank8,项目名称:NaturalLanguageProcessor,代码行数:49,代码来源:NaturalLanguage.py

示例8: get_tfidf_values

    def get_tfidf_values(self, sentence):
        blob = tb(sentence)
        self.bloblist.append(blob)
        blob_list = self.bloblist[:]
        # blobList.append(blob)
        single_words = blob.words
        pairs = [Word(single_words[i] + ' ' + single_words[i + 1]) for i in range(len(single_words) - 1)]
        scores_pairs = {word: self.__tfidf__(word, blob, blob_list, 2) for word in pairs}
        sorted_words_pairs = sorted(scores_pairs.items(), key=lambda x: x[1], reverse=True)
        scores_single = {word: self.__tfidf__(word, blob, blob_list, 1) for word in blob.words}
        sorted_words_single = sorted(scores_single.items(), key=lambda x: x[1], reverse=True)
        # sorted_words = sorted(sorted_words_pairs + sorted_words_single, key=lambda x: x[1], reverse=True)

        ds = 0
        nmd = 0
        tec = 0
        for i, word in enumerate(sorted_words_single):
            ds += self.__ds_check__(word[0]) * word[1]
            nmd += self.__nmd_check__(word[0]) * word[1]
            tec += self.__tec_check__(word[0]) * word[1]
        for i, word in enumerate(sorted_words_pairs):
            ds += self.__ds_check__(word[0]) * word[1]
            nmd += self.__nmd_check__(word[0]) * word[1]
            tec += self.__tec_check__(word[0]) * word[1]

        return [ds, nmd, tec]
开发者ID:rfire01,项目名称:SAGLET,代码行数:26,代码来源:tfidfAnalyze.py

示例9: features_pos_tag

 def features_pos_tag(self):
   blob = tb('.'.join([self.title,self.short,self.need,self.essay]))
   counts = Counter(tag for word,tag in blob.tags)
   total = sum(counts.values())
   ratio_dict = tag_dict.copy()
   ratio_dict.update(dict((word, float(count)/total) for word,count in counts.items()))
   return tuple(map(lambda k: ratio_dict[k], tag_list))
开发者ID:hippozhu,项目名称:kdd2104,代码行数:7,代码来源:essay_feature.py

示例10: get_tweet_info

def get_tweet_info(tweet):
    processed_tweet = {
            'tweet_id': tweet.id_str,
            'created_by_id': tweet.user.id,
            'created_at': tweet.created_at,
            'text': tweet.text,
            'coordinates': tweet.coordinates,
            # Note: only returns a non-zero favorite_count for an original
            # tweet. We'd need to look up the original tweet itself to get
            # the favorite_count, which is possible.
            'favorite_count': tweet.favorite_count,
            'retweet_count': tweet.retweet_count
# This favorited field only tells us if we, the authenticated user have
# favorited this tweet, which isn't that helpful.
#            'favorited': tweet.favorited,
        }
    if 'hashtags' in tweet.entities:
        processed_tweet['hashtags'] = tweet.entities['hashtags']
    else:
        processed_tweet['hashtags'] = None
    if 'media' in tweet.entities:
        processed_tweet['media'] = tweet.entities['media']
    else: processed_tweet['media'] = None

    # Get Sentiment
    blob = tb(tweet.text)
    sentiment = {'polarity': blob.sentiment.polarity, 
            'subjectivity': blob.sentiment.subjectivity
            }
    processed_tweet['sentiment'] = sentiment

    return processed_tweet
开发者ID:zdellison,项目名称:cs194_project,代码行数:32,代码来源:views.py

示例11: stemming

def stemming(doc):

    d = toker.tokenize(doc)
    d = [k for k in d if k not in cachedStopWords]
    for i in range(0,len(d)):
        d[i]=lemma.lemmatize(d[i])
    return tb(" ".join(d))    
开发者ID:goeastagent,项目名称:recdoc,代码行数:7,代码来源:nlp_test_wang_stemming.py

示例12: __init__

 def __init__(self,graph):
   self.bloblist = []
   for node in graph.nodes():
     try:
       self.bloblist.append(tb(graph.node[node]['abstract']))
     except:
       print "No abstract for node ",node
开发者ID:alextaylorjones,项目名称:NS202-Visualization-Of-Metro-Maps,代码行数:7,代码来源:concept_helper.py

示例13: buildTestData

 def buildTestData(self):
     self.testBloblist = {}
     for key, value in self.dev.iteritems():
         content = '. '.join(self.dev[key]['content'])
         content.replace('..','.')
         self.testBloblist[key] = (tb(content))
     self.testBloblistLength = len(self.testBloblist)
开发者ID:vswamy,项目名称:summarizer,代码行数:7,代码来源:tf_idf_stemmer.py

示例14: extract

def extract(storyString):
    storyText = tb(storyString)
    results = []
    for sentence in storyText.sentences: # split text into sentences
        results.append(analyze_sent_semantics(sentence))

    return results
开发者ID:gmittal,项目名称:aar-nlp-research-2016,代码行数:7,代码来源:text_parse.py

示例15: main

def main():
    #print 'Hello there'
    # Command line args are in sys.argv[1], sys.argv[2] ...
    # sys.argv[0] is the script name itself and can be ignored

    dataList = []

    for f in os.listdir('documents'):
        filePath = 'documents\\' + f
        #print filePath
        fileName, fileExtension = os.path.splitext(filePath)
        #print fileExtension
        if fileExtension.lower() == '.docx':
            print '' #'its a {0} {1}{2}'.format('word document', fileName, fileExtension)
            doc = docxDocument(filePath)
            for p in doc.paragraphs:
                dataList.append(p.text)     #print p.text
            #print "-------------------------------"
        elif fileExtension.lower() == '.pdf':
            print '' #'its a {0} {1}{2}'.format('pdf document', fileName, fileExtension)
            # with open(filePath) as f:
            #     doc = slate.PDF(f)
            #     print doc[1]
            #     exit()


            #TODO
        elif ((fileExtension.lower() == '.html') or (fileExtension.lower() == '.htm')):
            print '' #'its a {0} {1}{2}'.format('html file', fileName, fileExtension)
            with codecs.open (filePath, errors='ignore') as myfile:
                source = myfile.read()
                article = Document(source).summary()
                title = Document(source).title()
                soup = BeautifulSoup(article, 'lxml')
                final = replaceTwoOrMore((title.replace('\n', ' ').replace('\r', '') + '.' + soup.text.replace('\n', ' ').replace('\r', '')))
                dataList.append(final)
                #print '*** TITLE *** \n\"' + title + '\"\n'
                #print '*** CONTENT *** \n\"' + soup.text + '[...]\"'
        else:
            print '' # 'undectected document type'
            print '' #"-------------------------------"

    #print dataList
    #for i in dataList:
    #    print i
    cachedStopWords = stopwords.words("english")
    combined = ' '.join(dataList)

    #print combined
    bloblist = [tb(combined)]

    for i, blob in enumerate(bloblist):
        print("Top words in document {}".format(i + 1))
        scores = {word: tfidf(word, blob, bloblist) for word in blob.words if word not in nltk.corpus.stopwords.words('english')}
        #print scores
        sorted_words = sorted(scores.items(), key=lambda x: x[1], reverse=True)
        #print sorted_words
        for word, score in sorted_words:
            print("\tWord: {}, TF-IDF: {}".format(word, round(score, 5)))
开发者ID:adamstein,项目名称:mayhem,代码行数:59,代码来源:run.py


注:本文中的textblob.tb函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。