当前位置: 首页>>代码示例>>Python>>正文


Python sessioninfomanager.updateSessionInfo函数代码示例

本文整理汇总了Python中utils.sessioninfomanager.updateSessionInfo函数的典型用法代码示例。如果您正苦于以下问题:Python updateSessionInfo函数的具体用法?Python updateSessionInfo怎么用?Python updateSessionInfo使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了updateSessionInfo函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __createTasksForThreads

 def __createTasksForThreads(self):
     """
     This will create Tasks for the threads found on the given url
     The # of Tasks are limited by Config Variable
     """
     self.__total_threads_count = 0
     self.__last_timestamp = datetime(1980, 1, 1)
     #The Maximum No of threads to process, Bcoz, not all the forums get
     #updated Everyday, At maximum It will 100
     self.__max_threads_count = int(tg.config.get(path='Connector', key=\
                                         'anandtechforums_maxthreads'))
     self.__setSoupForCurrentUri()
     while self.__getThreads():
         try:
             next_page_uri = self.soup.find('a', text='>',rel='Next').parent['href']
             data_dict = dict(parse_qsl(next_page_uri.split('?')[-1]))
             if 's' in data_dict.keys():
                 data_dict.pop('s')
             self.currenturi = self.__baseuri + 'forumdisplay.php?'+ urlencode(data_dict)                    
             self.__setSoupForCurrentUri()
         except:
             log.exception(self.log_msg('Next Page link not found for url \
                                                 %s'%self.currenturi))
             break
     log.info(self.log_msg('# of Tasks Added is %d'%len(self.linksOut)))
     #self.linksOut = []
     if self.linksOut:
         updateSessionInfo('Search', self.session_info_out, \
                 self.__last_timestamp , None, 'ForumThreadsPage', \
                 self.task.instance_data.get('update'))
     return True
开发者ID:jsyadav,项目名称:CrawlerFramework,代码行数:31,代码来源:anandtechforumsconnector.py

示例2: __createTasksForThreads

 def __createTasksForThreads(self):
     """
     This will create Tasks for the threads found on the given url
     The # of Tasks are limited by Config Variable
     """
     self.__total_threads_count = 0
     self.__last_timestamp = datetime( 1980,1,1 )
     self.__setSoupForCurrentUri()
     self.__max_threads_count = int(tg.config.get(path='Connector', key=\
                                         'htcpedia_maxthreads'))
     
     while self.__getThreads():
         try:
             self.currenturi = self.currenturi = self.__removeSessionId('http://htcpedia.com/forum/'  + self.soup.find('a', rel='next')['href'])
             self.__setSoupForCurrentUri()
         except:
             log.info(self.log_msg('Next Page link not found for url \
                                                 %s'%self.currenturi))
             break
     if self.__links_to_process:
         updateSessionInfo('Search', self.session_info_out,\
                 self.__last_timestamp , None, 'ForumThreadsPage', \
                 self.task.instance_data.get('update'))
     log.info(self.log_msg('# of tasks added is %d'%len(self.linksOut)))
     return True
开发者ID:jsyadav,项目名称:CrawlerFramework,代码行数:25,代码来源:htcpediaconnector.py

示例3: __createTasksForThreads

    def __createTasksForThreads(self):
        """
        This will create Tasks for the threads found on the given url
        The # of Tasks are limited by Config Variable
        """
        self.__current_thread_count = 0
        self.__last_timestamp = datetime(1980, 1, 1)
        self.__max_threads_count = int(tg.config.get(path='Connector', 
                                                     key='ivillage_maxthreads'))
        while self.__getThreads():
            try:
                link_next = self.soup.find('a', href=True, text='Next').parent['href']
                self.currenturi = link_next

                self.__setSoupForCurrentUri()
            except:
                log.exception(self.log_msg('Next Page link not found for url %s' % self.currenturi))
                break

        log.info('Total # of tasks found is %d' % len(self.linksOut))
        if self.linksOut:
            updateSessionInfo('Search', self.session_info_out, 
                              self.__last_timestamp , None, 'ForumThreadsPage', 
                              self.task.instance_data.get('update'))
        return True
开发者ID:jsyadav,项目名称:CrawlerFramework,代码行数:25,代码来源:ivillageconnector.py

示例4: __createTasksForThreads

 def __createTasksForThreads(self):
     """
     This will create Tasks for the threads found on the given url
     The # of Tasks are limited by Config Variable
     """
     try:
         self.__total_threads_count = 0
         self.__last_timestamp = datetime( 1980,1,1 )
         #The Maximum No of threads to process, Bcoz, not all the forums get
         #updated Everyday, At maximum It will 100
         self.__max_threads_count = int(tg.config.get(path='Connector', key=\
                                             'talkandroid_maxthreads'))
         self.__setSoupForCurrentUri()
         while True:
             try:
                 if not self.__getThreads():
                     break
                 self.currenturi =  self.soup.find('a', text='>').parent['href']
                 self.__setSoupForCurrentUri()
             except:
                 log.info(self.log_msg('Next Page link not found for url \
                                                     %s'%self.currenturi))
                 break
         if self.linksOut:
             updateSessionInfo('Search', self.session_info_out,\
                     self.__last_timestamp , None, 'ForumThreadsPage', \
                     self.task.instance_data.get('update'))
         return True
     except:
         log.exception(self.log_msg('Exception while creating tasks for the url %s'\
                                                             %self.currenturi)) 
         return False
开发者ID:jsyadav,项目名称:CrawlerFramework,代码行数:32,代码来源:talkandroidconnector.py

示例5: __createTasksForThreads

 def __createTasksForThreads(self):
     """
     This will create Tasks for the threads found on the given url
     The # of Tasks are limited by Config Variable
     """
     log.info('hello')
     self.__current_thread_count = 0
     self.__last_timestamp = datetime(1980, 1, 1)
     self.__max_threads_count = int(tg.config.get(path='Connector', key=\
                                         'mdjunction_maxthreads'))
     while self.__getThreads():
         try:
             current_page_tag = self.soup.find('strong', text=re.compile('^\[\d+\]$'))
             self.currenturi = current_page_tag.findParent('td').find('a', text=str(int(current_page_tag[1:-1])+1)).parent['href']
             self.__setSoupForCurrentUri()
         except:
             log.exception(self.log_msg('Next Page link not found for url \
                                                 %s'%self.currenturi))
             break
     log.info('Total # of tasks found is %d'%len(self.linksOut))
     #self.linksOut = None
     if self.linksOut:
         updateSessionInfo('Search', self.session_info_out, \
                 self.__last_timestamp, None, 'ForumThreadsPage', \
                 self.task.instance_data.get('update'))
     return True
开发者ID:jsyadav,项目名称:CrawlerFramework,代码行数:26,代码来源:mdjunctionconnector.py

示例6: __createTasksForThreads

 def __createTasksForThreads(self):
         """
         This will create Tasks for the threads found on the given url
             The # of Tasks are limited by Config Variable
         """
         self.__setSoupForCurrentUri()
         self.__total_threads_count = 0
         self.__baseuri = 'http://baliforum.com'
         self.__last_timestamp =datetime(1980, 1, 1) 
         #The Maximum No of threads to process, Bcoz, not all the forums get
         #updated Everyday, At maximum It will 100
         self.__max_threads_count = int(tg.config.get(path='Connector', key=\
                                         'baliforum_maxthreads'))
         while self.__processForumUrl():
             try:
                 self.currenturi =self.soup.find('img', alt='Next page').findParent('a')['href']                    
                 self.__setSoupForCurrentUri()
             except:
                 log.info(self.log_msg('Next Page link not found for url \
                                                     %s'%self.currenturi))
                 break
         log.debug(self.log_msg('LINKSOUT: ' + str(len(self.linksOut))))
         #self.linksOut = [] # To Remove
         if self.linksOut:
             updateSessionInfo('Search', self.session_info_out, \
                         self.__last_timestamp , None, 'ForumThreadsPage', \
                         self.task.instance_data.get('update'))
         return True
开发者ID:jsyadav,项目名称:CrawlerFramework,代码行数:28,代码来源:baliforumconnector.py

示例7: __createTasksForThreads

 def __createTasksForThreads(self):
     """
     This will create Tasks for the threads found on the given url
     The # of Tasks are limited by Config Variable
     """
     self.__current_thread_count = 0
     self.__last_timestamp = datetime( 1980,1,1 )
     self.__max_threads_count = int(tg.config.get(path='Connector', key=\
                                         'fatwallet_maxthreads'))
     while self.__getThreads():
         try:
             headers = []
             next_tag = self.soup.find('input', value='Next 20')
             form_tag = next_tag.findParent('form')
             input_values = form_tag.findAll('input', type='hidden')
             for input_value in input_values:
                 headers.append((input_value['name'],input_value['value'] ))
             self.currenturi = 'http://www.fatwallet.com' + form_tag['action'] + '?' + urlencode(headers )
             self.__setSoupForCurrentUri()
         except:
             log.exception(self.log_msg('Next Page link not found for url \
                                                 %s'%self.currenturi))
             break
     log.info('Total # of tasks found is %d'%len(self.linksOut))
     #self.linksOut = None
     if self.linksOut:
         updateSessionInfo('Search', self.session_info_out,\
                 self.__last_timestamp , None, 'ForumThreadsPage', \
                 self.task.instance_data.get('update'))
     return True
开发者ID:jsyadav,项目名称:CrawlerFramework,代码行数:30,代码来源:fatwalletconnector.py

示例8: fetch

 def fetch(self):
     """
     Fetches the first RESULTS_ITERATIONS results as specified by the attributes, and populate the result links to self.linksOut
     """
     try:
         if re.match(".*\/threads[\/]?$",self.task.instance_data['uri']):
             self.last_timestamp = datetime(1,1,1)
             self.forum_name = re.findall('\/([^\/]+)\/threads\/?$', urlparse(self.task.instance_data['uri'])[2])[0]
             self.crawl_count = int(tg.config.get(path='Connector',key='microsoft_numresults'))
             self.count = 0
             self.done = False
             self.currenturi = self.task.instance_data['uri']+'?sort=firstpostdesc'
             while self.count< self.crawl_count and not self.done:
                 self.__getPageData()
             log.debug(self.log_msg("Length of linksout is %d"%(len(self.linksOut))))
             if self.linksOut:
                 updateSessionInfo('search', self.session_info_out,self.last_timestamp , None,'ForumThreadsPage', self.task.instance_data.get('update'))
             return True
         elif re.match(".*\/thread\/.*?$",self.task.instance_data['uri']):
             self.__getThread()
             self.__getQuestion()
             self.__getAnswers()
             return True
         else:
             log.exception(self.log_msg("Unassociated url %s"%(self.task.instance_data['uri'])))
             return False
     except:
         log.exception(self.log_msg("Exception occured in fetch()"))
         return False
开发者ID:jsyadav,项目名称:CrawlerFramework,代码行数:29,代码来源:microsoftsocialconnector.py

示例9: __createTasksForThreads

 def __createTasksForThreads(self):
     """
     This will create Tasks for the threads found on the given url
     The # of Tasks are limited by Config Variable
     """
     self.__total_threads_count = 0
     self.__baseuri = 'http://forums.seagate.com'
     self.__last_timestamp = datetime(1980, 1, 1)
     #The Maximum No of threads to process, Bcoz, not all the forums get
     #updated Everyday, At maximum It will 100
     self.__max_threads_count = int(tg.config.get(path='Connector', key=\
                                         'seagateforums_maxthreads'))
     self.__setSoupForCurrentUri()
     while self.__getThreads():
         try:
             self.currenturi = self.__baseuri + self.soup.find('a', \
                     text='Next').findParent('a')['href'].split(';')[0]
             self.__setSoupForCurrentUri()
         except:
             log.info(self.log_msg('Next Page link not found for url \
                                                 %s'%self.currenturi))
             break
     #self.linksOut = []
     if self.linksOut:
         updateSessionInfo('Search', self.session_info_out, \
                 self.__last_timestamp , None, 'ForumThreadsPage', \
                 self.task.instance_data.get('update'))
     return True
开发者ID:jsyadav,项目名称:CrawlerFramework,代码行数:28,代码来源:seagateforumsconnector.py

示例10: __createTasksForThreads

 def __createTasksForThreads(self):
     """
     This will create Tasks for the threads found on the given url
     The # of Tasks are limited by Config Variable
     """
     self.__total_threads_count = 0
     self.__last_timestamp = datetime( 1980,1,1 )
     self.__setSoupForCurrentUri()
     self.__max_threads_count = int(tg.config.get(path='Connector', key=\
                                         'htchd2forum_maxthreads'))
     current_page_no = 1
     while self.__getThreads():
         try:
             current_page_no += 1
             self.currenturi = self.__removeSessionId([x for x in self.soup.findAll('a', 'navPages') if int(stripHtml(x.renderContents()))==current_page_no][0]['href'])
             self.__setSoupForCurrentUri()
         except:
             log.info(self.log_msg('Next Page link not found for url \
                                                 %s'%self.currenturi))
             break
     if self.__links_to_process:
         updateSessionInfo('Search', self.session_info_out,\
                 self.__last_timestamp , None, 'ForumThreadsPage', \
                 self.task.instance_data.get('update'))
     log.info(self.log_msg('# of tasks added is %d'%len(self.linksOut)))
     return True
开发者ID:jsyadav,项目名称:CrawlerFramework,代码行数:26,代码来源:htchd2forumconnector.py

示例11: __createTasksForThreads

 def __createTasksForThreads(self):
     
     """
     This will create Tasks for the threads found on the given url
     The # of Tasks are limited by Config Variable
     """
     try:
                 
         self.__total_threads_count = 0
         self.__last_timestamp = datetime( 1980,1,1 )
         self.__max_threads_count = int(tg.config.get(path='Connector', key=\
                                             'iphoneforums_maxthreads'))
         self.__setSoupForCurrentUri()
         while self.__processForumUrl():
             try:
                 self.currenturi = self.soup.find('a',title = re.compile('Next Page - '))['href']
                 self.__setSoupForCurrentUri()
             except:
                 log.exception(self.log_msg('Next Page link not found for url \
                                                 %s'%self.currenturi))
                 break                
                 
         log.info(self.log_msg('LINKSOUT: ' + str(len(self.linksOut))))
         #self.linksOut = [] # To Remove
         if self.linksOut:
             updateSessionInfo('Search', self.session_info_out, \
                         self.__last_timestamp , None, 'ForumThreadsPage', \
                         self.task.instance_data.get('update'))
         return True  
     except:
         log.info(self.log_msg('Exception while creating tasks for the url %s'\
                                                      %self.currenturi)) 
         return False
开发者ID:jsyadav,项目名称:CrawlerFramework,代码行数:33,代码来源:iphoneforumsconnector.py

示例12: fetch

    def fetch(self):
        """
        Fetch of egg head cafe
        """
        self.genre="Review"
        try:
            self.base_url = 'http://www.eggheadcafe.com'
            self.parent_uri = self.currenturi
            self.total_posts_count = 0
            self.last_timestamp = datetime( 1980,1,1 )
            self.max_posts_count = int(tg.config.get(path='Connector',key='eggheadcafe_max_threads_to_process'))
            #headers={'Host':'www.eggheadcafe.com'}
            #headers['Referer'] = self.currenturi
            #data = dict(parse_qsl(self.currenturi.split('?')[-1]))
            if not 'forumtree.aspx' in self.currenturi:
                if not self.__setSoup():
                    log.info(self.log_msg('Soup not set , Returning False from Fetch'))
                    return False
                self.__getParentPage()
                while True:
                    parent_soup = copy.copy(self.soup)
                    self.__addPosts()
                    try:
                        self.currenturi = self.base_url +  parent_soup.find('a',text='Next').parent['href']
                        if not self.__setSoup():
                            break
                    except:
                        log.info(self.log_msg('Next Page link not found'))
                        break
                return True
            else:
                if not self.__setSoup():
                    log.info(self.log_msg('Soup not set , Returning False from Fetch'))
                    return False
                while True:
                    try:
                        if not self.__getThreadPage():
                            break
##                        data = dict(parse_qsl(self.currenturi.split('?')[-1]))
##                        data['ctl00$ContentPlaceHolder1$ddlMessageCount'] = '20'
##                        data['ctl00$ContentPlaceHolder1$ddlOrder'] ='Desc'
##                        data['__EVENTTARGET'] = self.soup.find('a',id=re.compile('LinkButtonNext'))['id'].replace('_','$')
##                        jscript_arg = ['__EVENTVALIDATION','__VIEWSTATE']
##                        for each in jscript_arg:
##                            data[each] =  self.soup.find('input',id=each)['value']
                        self.currenturi = self.base_url +  self.soup.find('a',text='Next').parent['href']
                        if not self.__setSoup():
                            break
                    except:
                        log.info(self.log_msg('Next Page link not found'))
                        break
                if self.linksOut:
                    updateSessionInfo('Search', self.session_info_out,self.last_timestamp , None,'ForumThreadsPage', self.task.instance_data.get('update'))
                return True
        except:
            log.exception(self.log_msg('Exception in fetch'))
            return False
开发者ID:jsyadav,项目名称:CrawlerFramework,代码行数:57,代码来源:eggheadcafeconnector.py

示例13: fetch

 def fetch(self):
     """
     Fetch of http://forums.devx.com
     """
     self.genre="Review"
     try:
         
         self.parent_uri = self.currenturi
         log.info(self.parent_uri)
         self.currenturi =  self.__getStandUri(self.parent_uri)
         log.info(self.log_msg('The Standard Uri is'))
         log.info(self.parent_uri)
         if self.currenturi.startswith('http://forums.devx.com/showthread.'):
             if not self.__setSoup():
                 log.info(self.log_msg('Soup not set , Returning False from Fetch'))
                 return False
             self.__getParentPage()
             self.post_type= True
             while True:
                 self.__addPosts()
                 try:
                     self.currenturi = self.__getStandUri('http://forums.devx.com/' + self.soup.find('a',text='&gt;').parent['href'])
                 except:
                     log.info(self.log_msg('Next page not set'))
                     break
                 if not self.__setSoup():
                     log.info(self.log_msg('cannot continue'))
                     break
             return True
         elif self.currenturi.startswith('http://forums.devx.com/forumdisplay'):
             self.total_posts_count = 0
             self.last_timestamp = datetime( 1980,1,1 )
             self.max_posts_count = int(tg.config.get(path='Connector',key='devxforum_numresults'))
             self.currenturi = self.currenturi + '&daysprune=-1&order=desc&sort=lastpost'
             log.info(self.log_msg('The link is:'))
             log.info(self.currenturi)
             if not self.__setSoup():
                 log.info(self.log_msg('Soup not set , Returning False from Fetch'))
                 return False
             while True:
                 if not self.__getThreads():
                     break
                 try:
                     self.currenturi = self.__getStandUri('http://forums.devx.com/' + self.soup.find('a',text='&gt;').parent['href'])
                     if not self.__setSoup():
                         break
                 except:
                     log.info(self.log_msg('Next Page link not found'))
                     break
             if self.linksOut:
                 updateSessionInfo('Search', self.session_info_out,self.last_timestamp , None,'ForumThreadsPage', self.task.instance_data.get('update'))
             return True
         else:
             log.info(self.log_msg('Url format is not recognized, Please verify the url'))
     except:
         log.exception(self.log_msg('Exception in fetch'))
         return False
开发者ID:jsyadav,项目名称:CrawlerFramework,代码行数:57,代码来源:devxconnector.py

示例14: fetch

 def fetch(self):
     """
     Fetch of polish forums
     sample uri :  http://www.mynextcollege.com/college-reviews/discussion-room-f6.html
     """
     self.genre="Review"
     try:
         self.parent_uri = self.currenturi
         self.currenturi = self.currenturi.split('-sid=')[0]
         if self.currenturi=='http://www.mynextcollege.com/college-reviews/':
             try:
                 if not self.__setSoup():
                     return False
                 self.__addFortumLinks()
             except:
                 log.info(self.log_msg('cannot add tasks'))
                 return False
         if re.match('.*?\-f\d+\.html$', self.currenturi):
             self.total_posts_count = 0
             self.last_timestamp = datetime( 1980,1,1 )
             self.max_posts_count = int(tg.config.get(path='Connector',key='mynextcollege_numresults'))
             if not self.__setSoup():
                 log.info(self.log_msg('Soup not set , Returning False from Fetch'))
                 return False
             while True:
                 if not self.__getThreads():
                     break
                 try:
                     self.currenturi = 'http://www.mynextcollege.com/college-reviews' + self.soup.find('a',text='Next').parent['href'][1:].split('-sid=')[0]
                     if not self.__setSoup():
                         break
                 except:
                     log.info(self.log_msg('Next Page link not found'))
                     break
             if self.linksOut:
                 updateSessionInfo('Search', self.session_info_out,self.last_timestamp , None,'ForumThreadsPage', self.task.instance_data.get('update'))
             return True
         else:
             if not self.__setSoup():
                 log.info(self.log_msg('Soup not set , Returning False from Fetch'))
                 return False
             self.__getParentPage()
             self.post_type= True
             while True:
                 self.__addPosts()
                 try:
                     self.currenturi = 'http://www.mynextcollege.com/college-reviews' + self.soup.find('a',text='Next').parent['href'][1:].split('-sid=')[0]
                     if not self.__setSoup():
                         break
                 except:
                     log.info(self.log_msg('Next page not set'))
                     break
             return True
     except:
         log.exception(self.log_msg('Exception in fetch'))
         return False
开发者ID:jsyadav,项目名称:CrawlerFramework,代码行数:56,代码来源:mynextcollegeconnector.py

示例15: fetch

 def fetch(self):
     """
     Fetch of forum page
     """
     self.genre="Review"
     try:
         self.parent_uri = self.currenturi
         self.base_url = 'http://ocenbank.pl/forum/'
         if self.currenturi.startswith('http://ocenbank.pl/forum/viewforum'):
             self.total_posts_count = 0
             self.last_timestamp = datetime( 1980,1,1 )
             self.max_posts_count = int(tg.config.get(path='Connector',key='ocean_forum_numresults'))
             if not self.__setSoup():
                 log.info(self.log_msg('Soup not set , Returning False from Fetch'))
                 return False
             next_page_no = 2
             while True:
                 if not self.__getThreads():
                     break
                 try:
                     self.currenturi = self.base_url + self.soup.find('p','pagelink conl').find('a',text=str(next_page_no)).parent['href']
                     if not self.__setSoup():
                         break
                     next_page_no = next_page_no + 1
                 except:
                     log.info(self.log_msg('Next Page link not found'))
                     break
             if self.linksOut:
                 updateSessionInfo('Search', self.session_info_out,self.last_timestamp , None,'ForumThreadsPage', self.task.instance_data.get('update'))
             return True
         elif self.currenturi.startswith('http://ocenbank.pl/forum/viewtopic'):
             if not self.__setSoup():
                 log.info(self.log_msg('Soup not set , Returning False from Fetch'))
                 return False
             self.__getParentPage()
             self.post_type = True
             next_page_no = 2
             while True:
                 self.__addPosts()
                 try:
                     self.currenturi = self.base_url + self.soup.find('p','pagelink conl').find('a',text=str(next_page_no)).parent['href']
                     if not self.__setSoup():
                         break
                     next_page_no = next_page_no + 1
                 except:
                     log.info(self.log_msg('Next Page link not found'))
                     break
             return True
         else:
             log.info(self.log_msg('Wrong url is feeded'))
             log.info(self.log_msg('Hai+'+ self.currenturi))
             return False
     except:
         log.exception(self.log_msg('Exception in fetch'))
         return False
开发者ID:jsyadav,项目名称:CrawlerFramework,代码行数:55,代码来源:oceanconnector.py


注:本文中的utils.sessioninfomanager.updateSessionInfo函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。