本文整理汇总了Python中utils.sessioninfomanager.updateSessionInfo函数的典型用法代码示例。如果您正苦于以下问题:Python updateSessionInfo函数的具体用法?Python updateSessionInfo怎么用?Python updateSessionInfo使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了updateSessionInfo函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __createTasksForThreads
def __createTasksForThreads(self):
"""
This will create Tasks for the threads found on the given url
The # of Tasks are limited by Config Variable
"""
self.__total_threads_count = 0
self.__last_timestamp = datetime(1980, 1, 1)
#The Maximum No of threads to process, Bcoz, not all the forums get
#updated Everyday, At maximum It will 100
self.__max_threads_count = int(tg.config.get(path='Connector', key=\
'anandtechforums_maxthreads'))
self.__setSoupForCurrentUri()
while self.__getThreads():
try:
next_page_uri = self.soup.find('a', text='>',rel='Next').parent['href']
data_dict = dict(parse_qsl(next_page_uri.split('?')[-1]))
if 's' in data_dict.keys():
data_dict.pop('s')
self.currenturi = self.__baseuri + 'forumdisplay.php?'+ urlencode(data_dict)
self.__setSoupForCurrentUri()
except:
log.exception(self.log_msg('Next Page link not found for url \
%s'%self.currenturi))
break
log.info(self.log_msg('# of Tasks Added is %d'%len(self.linksOut)))
#self.linksOut = []
if self.linksOut:
updateSessionInfo('Search', self.session_info_out, \
self.__last_timestamp , None, 'ForumThreadsPage', \
self.task.instance_data.get('update'))
return True
示例2: __createTasksForThreads
def __createTasksForThreads(self):
"""
This will create Tasks for the threads found on the given url
The # of Tasks are limited by Config Variable
"""
self.__total_threads_count = 0
self.__last_timestamp = datetime( 1980,1,1 )
self.__setSoupForCurrentUri()
self.__max_threads_count = int(tg.config.get(path='Connector', key=\
'htcpedia_maxthreads'))
while self.__getThreads():
try:
self.currenturi = self.currenturi = self.__removeSessionId('http://htcpedia.com/forum/' + self.soup.find('a', rel='next')['href'])
self.__setSoupForCurrentUri()
except:
log.info(self.log_msg('Next Page link not found for url \
%s'%self.currenturi))
break
if self.__links_to_process:
updateSessionInfo('Search', self.session_info_out,\
self.__last_timestamp , None, 'ForumThreadsPage', \
self.task.instance_data.get('update'))
log.info(self.log_msg('# of tasks added is %d'%len(self.linksOut)))
return True
示例3: __createTasksForThreads
def __createTasksForThreads(self):
"""
This will create Tasks for the threads found on the given url
The # of Tasks are limited by Config Variable
"""
self.__current_thread_count = 0
self.__last_timestamp = datetime(1980, 1, 1)
self.__max_threads_count = int(tg.config.get(path='Connector',
key='ivillage_maxthreads'))
while self.__getThreads():
try:
link_next = self.soup.find('a', href=True, text='Next').parent['href']
self.currenturi = link_next
self.__setSoupForCurrentUri()
except:
log.exception(self.log_msg('Next Page link not found for url %s' % self.currenturi))
break
log.info('Total # of tasks found is %d' % len(self.linksOut))
if self.linksOut:
updateSessionInfo('Search', self.session_info_out,
self.__last_timestamp , None, 'ForumThreadsPage',
self.task.instance_data.get('update'))
return True
示例4: __createTasksForThreads
def __createTasksForThreads(self):
"""
This will create Tasks for the threads found on the given url
The # of Tasks are limited by Config Variable
"""
try:
self.__total_threads_count = 0
self.__last_timestamp = datetime( 1980,1,1 )
#The Maximum No of threads to process, Bcoz, not all the forums get
#updated Everyday, At maximum It will 100
self.__max_threads_count = int(tg.config.get(path='Connector', key=\
'talkandroid_maxthreads'))
self.__setSoupForCurrentUri()
while True:
try:
if not self.__getThreads():
break
self.currenturi = self.soup.find('a', text='>').parent['href']
self.__setSoupForCurrentUri()
except:
log.info(self.log_msg('Next Page link not found for url \
%s'%self.currenturi))
break
if self.linksOut:
updateSessionInfo('Search', self.session_info_out,\
self.__last_timestamp , None, 'ForumThreadsPage', \
self.task.instance_data.get('update'))
return True
except:
log.exception(self.log_msg('Exception while creating tasks for the url %s'\
%self.currenturi))
return False
示例5: __createTasksForThreads
def __createTasksForThreads(self):
"""
This will create Tasks for the threads found on the given url
The # of Tasks are limited by Config Variable
"""
log.info('hello')
self.__current_thread_count = 0
self.__last_timestamp = datetime(1980, 1, 1)
self.__max_threads_count = int(tg.config.get(path='Connector', key=\
'mdjunction_maxthreads'))
while self.__getThreads():
try:
current_page_tag = self.soup.find('strong', text=re.compile('^\[\d+\]$'))
self.currenturi = current_page_tag.findParent('td').find('a', text=str(int(current_page_tag[1:-1])+1)).parent['href']
self.__setSoupForCurrentUri()
except:
log.exception(self.log_msg('Next Page link not found for url \
%s'%self.currenturi))
break
log.info('Total # of tasks found is %d'%len(self.linksOut))
#self.linksOut = None
if self.linksOut:
updateSessionInfo('Search', self.session_info_out, \
self.__last_timestamp, None, 'ForumThreadsPage', \
self.task.instance_data.get('update'))
return True
示例6: __createTasksForThreads
def __createTasksForThreads(self):
"""
This will create Tasks for the threads found on the given url
The # of Tasks are limited by Config Variable
"""
self.__setSoupForCurrentUri()
self.__total_threads_count = 0
self.__baseuri = 'http://baliforum.com'
self.__last_timestamp =datetime(1980, 1, 1)
#The Maximum No of threads to process, Bcoz, not all the forums get
#updated Everyday, At maximum It will 100
self.__max_threads_count = int(tg.config.get(path='Connector', key=\
'baliforum_maxthreads'))
while self.__processForumUrl():
try:
self.currenturi =self.soup.find('img', alt='Next page').findParent('a')['href']
self.__setSoupForCurrentUri()
except:
log.info(self.log_msg('Next Page link not found for url \
%s'%self.currenturi))
break
log.debug(self.log_msg('LINKSOUT: ' + str(len(self.linksOut))))
#self.linksOut = [] # To Remove
if self.linksOut:
updateSessionInfo('Search', self.session_info_out, \
self.__last_timestamp , None, 'ForumThreadsPage', \
self.task.instance_data.get('update'))
return True
示例7: __createTasksForThreads
def __createTasksForThreads(self):
"""
This will create Tasks for the threads found on the given url
The # of Tasks are limited by Config Variable
"""
self.__current_thread_count = 0
self.__last_timestamp = datetime( 1980,1,1 )
self.__max_threads_count = int(tg.config.get(path='Connector', key=\
'fatwallet_maxthreads'))
while self.__getThreads():
try:
headers = []
next_tag = self.soup.find('input', value='Next 20')
form_tag = next_tag.findParent('form')
input_values = form_tag.findAll('input', type='hidden')
for input_value in input_values:
headers.append((input_value['name'],input_value['value'] ))
self.currenturi = 'http://www.fatwallet.com' + form_tag['action'] + '?' + urlencode(headers )
self.__setSoupForCurrentUri()
except:
log.exception(self.log_msg('Next Page link not found for url \
%s'%self.currenturi))
break
log.info('Total # of tasks found is %d'%len(self.linksOut))
#self.linksOut = None
if self.linksOut:
updateSessionInfo('Search', self.session_info_out,\
self.__last_timestamp , None, 'ForumThreadsPage', \
self.task.instance_data.get('update'))
return True
示例8: fetch
def fetch(self):
"""
Fetches the first RESULTS_ITERATIONS results as specified by the attributes, and populate the result links to self.linksOut
"""
try:
if re.match(".*\/threads[\/]?$",self.task.instance_data['uri']):
self.last_timestamp = datetime(1,1,1)
self.forum_name = re.findall('\/([^\/]+)\/threads\/?$', urlparse(self.task.instance_data['uri'])[2])[0]
self.crawl_count = int(tg.config.get(path='Connector',key='microsoft_numresults'))
self.count = 0
self.done = False
self.currenturi = self.task.instance_data['uri']+'?sort=firstpostdesc'
while self.count< self.crawl_count and not self.done:
self.__getPageData()
log.debug(self.log_msg("Length of linksout is %d"%(len(self.linksOut))))
if self.linksOut:
updateSessionInfo('search', self.session_info_out,self.last_timestamp , None,'ForumThreadsPage', self.task.instance_data.get('update'))
return True
elif re.match(".*\/thread\/.*?$",self.task.instance_data['uri']):
self.__getThread()
self.__getQuestion()
self.__getAnswers()
return True
else:
log.exception(self.log_msg("Unassociated url %s"%(self.task.instance_data['uri'])))
return False
except:
log.exception(self.log_msg("Exception occured in fetch()"))
return False
示例9: __createTasksForThreads
def __createTasksForThreads(self):
"""
This will create Tasks for the threads found on the given url
The # of Tasks are limited by Config Variable
"""
self.__total_threads_count = 0
self.__baseuri = 'http://forums.seagate.com'
self.__last_timestamp = datetime(1980, 1, 1)
#The Maximum No of threads to process, Bcoz, not all the forums get
#updated Everyday, At maximum It will 100
self.__max_threads_count = int(tg.config.get(path='Connector', key=\
'seagateforums_maxthreads'))
self.__setSoupForCurrentUri()
while self.__getThreads():
try:
self.currenturi = self.__baseuri + self.soup.find('a', \
text='Next').findParent('a')['href'].split(';')[0]
self.__setSoupForCurrentUri()
except:
log.info(self.log_msg('Next Page link not found for url \
%s'%self.currenturi))
break
#self.linksOut = []
if self.linksOut:
updateSessionInfo('Search', self.session_info_out, \
self.__last_timestamp , None, 'ForumThreadsPage', \
self.task.instance_data.get('update'))
return True
示例10: __createTasksForThreads
def __createTasksForThreads(self):
"""
This will create Tasks for the threads found on the given url
The # of Tasks are limited by Config Variable
"""
self.__total_threads_count = 0
self.__last_timestamp = datetime( 1980,1,1 )
self.__setSoupForCurrentUri()
self.__max_threads_count = int(tg.config.get(path='Connector', key=\
'htchd2forum_maxthreads'))
current_page_no = 1
while self.__getThreads():
try:
current_page_no += 1
self.currenturi = self.__removeSessionId([x for x in self.soup.findAll('a', 'navPages') if int(stripHtml(x.renderContents()))==current_page_no][0]['href'])
self.__setSoupForCurrentUri()
except:
log.info(self.log_msg('Next Page link not found for url \
%s'%self.currenturi))
break
if self.__links_to_process:
updateSessionInfo('Search', self.session_info_out,\
self.__last_timestamp , None, 'ForumThreadsPage', \
self.task.instance_data.get('update'))
log.info(self.log_msg('# of tasks added is %d'%len(self.linksOut)))
return True
示例11: __createTasksForThreads
def __createTasksForThreads(self):
"""
This will create Tasks for the threads found on the given url
The # of Tasks are limited by Config Variable
"""
try:
self.__total_threads_count = 0
self.__last_timestamp = datetime( 1980,1,1 )
self.__max_threads_count = int(tg.config.get(path='Connector', key=\
'iphoneforums_maxthreads'))
self.__setSoupForCurrentUri()
while self.__processForumUrl():
try:
self.currenturi = self.soup.find('a',title = re.compile('Next Page - '))['href']
self.__setSoupForCurrentUri()
except:
log.exception(self.log_msg('Next Page link not found for url \
%s'%self.currenturi))
break
log.info(self.log_msg('LINKSOUT: ' + str(len(self.linksOut))))
#self.linksOut = [] # To Remove
if self.linksOut:
updateSessionInfo('Search', self.session_info_out, \
self.__last_timestamp , None, 'ForumThreadsPage', \
self.task.instance_data.get('update'))
return True
except:
log.info(self.log_msg('Exception while creating tasks for the url %s'\
%self.currenturi))
return False
示例12: fetch
def fetch(self):
"""
Fetch of egg head cafe
"""
self.genre="Review"
try:
self.base_url = 'http://www.eggheadcafe.com'
self.parent_uri = self.currenturi
self.total_posts_count = 0
self.last_timestamp = datetime( 1980,1,1 )
self.max_posts_count = int(tg.config.get(path='Connector',key='eggheadcafe_max_threads_to_process'))
#headers={'Host':'www.eggheadcafe.com'}
#headers['Referer'] = self.currenturi
#data = dict(parse_qsl(self.currenturi.split('?')[-1]))
if not 'forumtree.aspx' in self.currenturi:
if not self.__setSoup():
log.info(self.log_msg('Soup not set , Returning False from Fetch'))
return False
self.__getParentPage()
while True:
parent_soup = copy.copy(self.soup)
self.__addPosts()
try:
self.currenturi = self.base_url + parent_soup.find('a',text='Next').parent['href']
if not self.__setSoup():
break
except:
log.info(self.log_msg('Next Page link not found'))
break
return True
else:
if not self.__setSoup():
log.info(self.log_msg('Soup not set , Returning False from Fetch'))
return False
while True:
try:
if not self.__getThreadPage():
break
## data = dict(parse_qsl(self.currenturi.split('?')[-1]))
## data['ctl00$ContentPlaceHolder1$ddlMessageCount'] = '20'
## data['ctl00$ContentPlaceHolder1$ddlOrder'] ='Desc'
## data['__EVENTTARGET'] = self.soup.find('a',id=re.compile('LinkButtonNext'))['id'].replace('_','$')
## jscript_arg = ['__EVENTVALIDATION','__VIEWSTATE']
## for each in jscript_arg:
## data[each] = self.soup.find('input',id=each)['value']
self.currenturi = self.base_url + self.soup.find('a',text='Next').parent['href']
if not self.__setSoup():
break
except:
log.info(self.log_msg('Next Page link not found'))
break
if self.linksOut:
updateSessionInfo('Search', self.session_info_out,self.last_timestamp , None,'ForumThreadsPage', self.task.instance_data.get('update'))
return True
except:
log.exception(self.log_msg('Exception in fetch'))
return False
示例13: fetch
def fetch(self):
"""
Fetch of http://forums.devx.com
"""
self.genre="Review"
try:
self.parent_uri = self.currenturi
log.info(self.parent_uri)
self.currenturi = self.__getStandUri(self.parent_uri)
log.info(self.log_msg('The Standard Uri is'))
log.info(self.parent_uri)
if self.currenturi.startswith('http://forums.devx.com/showthread.'):
if not self.__setSoup():
log.info(self.log_msg('Soup not set , Returning False from Fetch'))
return False
self.__getParentPage()
self.post_type= True
while True:
self.__addPosts()
try:
self.currenturi = self.__getStandUri('http://forums.devx.com/' + self.soup.find('a',text='>').parent['href'])
except:
log.info(self.log_msg('Next page not set'))
break
if not self.__setSoup():
log.info(self.log_msg('cannot continue'))
break
return True
elif self.currenturi.startswith('http://forums.devx.com/forumdisplay'):
self.total_posts_count = 0
self.last_timestamp = datetime( 1980,1,1 )
self.max_posts_count = int(tg.config.get(path='Connector',key='devxforum_numresults'))
self.currenturi = self.currenturi + '&daysprune=-1&order=desc&sort=lastpost'
log.info(self.log_msg('The link is:'))
log.info(self.currenturi)
if not self.__setSoup():
log.info(self.log_msg('Soup not set , Returning False from Fetch'))
return False
while True:
if not self.__getThreads():
break
try:
self.currenturi = self.__getStandUri('http://forums.devx.com/' + self.soup.find('a',text='>').parent['href'])
if not self.__setSoup():
break
except:
log.info(self.log_msg('Next Page link not found'))
break
if self.linksOut:
updateSessionInfo('Search', self.session_info_out,self.last_timestamp , None,'ForumThreadsPage', self.task.instance_data.get('update'))
return True
else:
log.info(self.log_msg('Url format is not recognized, Please verify the url'))
except:
log.exception(self.log_msg('Exception in fetch'))
return False
示例14: fetch
def fetch(self):
"""
Fetch of polish forums
sample uri : http://www.mynextcollege.com/college-reviews/discussion-room-f6.html
"""
self.genre="Review"
try:
self.parent_uri = self.currenturi
self.currenturi = self.currenturi.split('-sid=')[0]
if self.currenturi=='http://www.mynextcollege.com/college-reviews/':
try:
if not self.__setSoup():
return False
self.__addFortumLinks()
except:
log.info(self.log_msg('cannot add tasks'))
return False
if re.match('.*?\-f\d+\.html$', self.currenturi):
self.total_posts_count = 0
self.last_timestamp = datetime( 1980,1,1 )
self.max_posts_count = int(tg.config.get(path='Connector',key='mynextcollege_numresults'))
if not self.__setSoup():
log.info(self.log_msg('Soup not set , Returning False from Fetch'))
return False
while True:
if not self.__getThreads():
break
try:
self.currenturi = 'http://www.mynextcollege.com/college-reviews' + self.soup.find('a',text='Next').parent['href'][1:].split('-sid=')[0]
if not self.__setSoup():
break
except:
log.info(self.log_msg('Next Page link not found'))
break
if self.linksOut:
updateSessionInfo('Search', self.session_info_out,self.last_timestamp , None,'ForumThreadsPage', self.task.instance_data.get('update'))
return True
else:
if not self.__setSoup():
log.info(self.log_msg('Soup not set , Returning False from Fetch'))
return False
self.__getParentPage()
self.post_type= True
while True:
self.__addPosts()
try:
self.currenturi = 'http://www.mynextcollege.com/college-reviews' + self.soup.find('a',text='Next').parent['href'][1:].split('-sid=')[0]
if not self.__setSoup():
break
except:
log.info(self.log_msg('Next page not set'))
break
return True
except:
log.exception(self.log_msg('Exception in fetch'))
return False
示例15: fetch
def fetch(self):
"""
Fetch of forum page
"""
self.genre="Review"
try:
self.parent_uri = self.currenturi
self.base_url = 'http://ocenbank.pl/forum/'
if self.currenturi.startswith('http://ocenbank.pl/forum/viewforum'):
self.total_posts_count = 0
self.last_timestamp = datetime( 1980,1,1 )
self.max_posts_count = int(tg.config.get(path='Connector',key='ocean_forum_numresults'))
if not self.__setSoup():
log.info(self.log_msg('Soup not set , Returning False from Fetch'))
return False
next_page_no = 2
while True:
if not self.__getThreads():
break
try:
self.currenturi = self.base_url + self.soup.find('p','pagelink conl').find('a',text=str(next_page_no)).parent['href']
if not self.__setSoup():
break
next_page_no = next_page_no + 1
except:
log.info(self.log_msg('Next Page link not found'))
break
if self.linksOut:
updateSessionInfo('Search', self.session_info_out,self.last_timestamp , None,'ForumThreadsPage', self.task.instance_data.get('update'))
return True
elif self.currenturi.startswith('http://ocenbank.pl/forum/viewtopic'):
if not self.__setSoup():
log.info(self.log_msg('Soup not set , Returning False from Fetch'))
return False
self.__getParentPage()
self.post_type = True
next_page_no = 2
while True:
self.__addPosts()
try:
self.currenturi = self.base_url + self.soup.find('p','pagelink conl').find('a',text=str(next_page_no)).parent['href']
if not self.__setSoup():
break
next_page_no = next_page_no + 1
except:
log.info(self.log_msg('Next Page link not found'))
break
return True
else:
log.info(self.log_msg('Wrong url is feeded'))
log.info(self.log_msg('Hai+'+ self.currenturi))
return False
except:
log.exception(self.log_msg('Exception in fetch'))
return False