本文整理汇总了Python中utils.sessioninfomanager.checkSessionInfo函数的典型用法代码示例。如果您正苦于以下问题:Python checkSessionInfo函数的具体用法?Python checkSessionInfo怎么用?Python checkSessionInfo使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了checkSessionInfo函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __getParentPage
def __getParentPage(self):
"""
This will get the parent info
"""
page = {}
try:
self.hierarchy = page['et_thread_hierarchy'] = [stripHtml(x.renderContents()) for x in self.soup.find('div','CommonBreadCrumbArea').findAll('a')][1:]
page['title']= page['et_thread_hierarchy'][-1]
except:
log.info(self.log_msg('Thread hierarchy is not found'))
page['title']=''
try:
self.thread_id = page['et_thread_id'] = unicode(self.currenturi.split('/')[-1].replace('.aspx',''))
except:
log.info(self.log_msg('Thread id not found'))
if checkSessionInfo(self.genre, self.session_info_out, self.parent_uri,\
self.task.instance_data.get('update')):
log.info(self.log_msg('Session info return True, Already exists'))
return False
for each in ['et_thread_last_post_author','ei_thread_replies_count','edate_last_post_date']:
try:
page[each] = self.task.pagedata[each]
except:
log.info(self.log_msg('page data cannot be extracted for %s'%each))
try:
post_hash = get_hash( page )
id=None
if self.session_info_out=={}:
id=self.task.id
result=updateSessionInfo( self.genre, self.session_info_out, self.\
parent_uri, post_hash,'Forum',self.task.instance_data.get('update'), Id=id)
if not result['updated']:
return False
page['path']=[self.parent_uri]
page['parent_path']=[]
page['uri'] = normalize( self.currenturi )
page['uri_domain'] = unicode(urlparse.urlparse(page['uri'])[1])
page['priority']=self.task.priority
page['level']=self.task.level
page['pickup_date'] = datetime.strftime(datetime.utcnow(),"%Y-%m-%dT%H:%M:%SZ")
page['posted_date'] = datetime.strftime(datetime.utcnow(),"%Y-%m-%dT%H:%M:%SZ")
page['connector_instance_log_id'] = self.task.connector_instance_log_id
page['connector_instance_id'] = self.task.connector_instance_id
page['workspace_id'] = self.task.workspace_id
page['client_id'] = self.task.client_id
page['client_name'] = self.task.client_name
page['last_updated_time'] = page['pickup_date']
page['versioned'] = False
page['data'] = ''
page['task_log_id']=self.task.id
page['entity'] = 'Post'
page['category']=self.task.instance_data.get('category','')
self.pages.append(page)
log.info(page)
log.info(self.log_msg('Parent Page added'))
return True
except :
log.exception(self.log_msg("parent post couldn't be parsed"))
return False
示例2: __setParentPage
def __setParentPage(self):
"""This will get the parent info
"""
page = {}
try:
page['et_thread_hierarchy'] = self.__hierarchy = [x.strip() for x in stripHtml(self.soup.find('div', 'deck breadcrumbs').renderContents()).split('>') if x.strip()][1:]
page['data'] = page['title'] = page['et_thread_hierarchy'][-1]
except:
log.exception(self.log_msg('Thread hierarchy and Title Not found for uri\
%s'%self.currenturi))
return
if checkSessionInfo(self.__genre, self.session_info_out, self.task.instance_data['uri'], \
self.task.instance_data.get('update')):
log.info(self.log_msg('Session info return True, Already exists'))
return
try:
result = updateSessionInfo('review', self.session_info_out, self.\
task.instance_data['uri'], get_hash( page ), 'forum', self.task.instance_data.get('update'))
if result['updated']:
page['path'] = [self.task.instance_data['uri']]
page['parent_path'] = []
page['uri'] = self.currenturi
page['uri_domain'] = unicode(urlparse.urlparse(page['uri'])[1])
page['data'] = ''
page['entity'] = 'thread'
page.update(self.__task_elements_dict)
page['posted_date'] = page['pickup_date']
self.pages.append(page)
log.info(self.log_msg('Parent Page Added'))
else:
log.info(self.log_msg('Result[updated] returned True for \
uri'%self.currenturi))
except:
log.exception(self.log_msg("parent post couldn't be parsed"))
示例3: __addPost
def __addPost(self, post):
'''It will add the post
'''
try:
page = self.__getData(post)
if not page:
return True
unique_key = get_hash( {'data' : page['data'] })
if checkSessionInfo('review', self.session_info_out, unique_key,\
self.task.instance_data.get('update'),parent_list\
= [self.currenturi]):
log.info(self.log_msg('Session info returns True'))
return False
result=updateSessionInfo('review', self.session_info_out, unique_key, \
get_hash( page ),'Review', self.task.instance_data.get('update'),\
parent_list=[self.currenturi])
if not result['updated']:
log.info(self.log_msg('Update session info returns False'))
return True
page['path'] = [self.currenturi]
page['parent_path'] = []
#page['path'].append(unique_key)
page['uri'] = self.currenturi
page['uri_domain'] = urlparse.urlparse(page['uri'])[1]
page['entity'] = 'post'
page.update(self.__task_elements_dict)
self.pages.append(page)
log.info(page)
log.info(self.log_msg('Post Added'))
return True
except:
log.exception(self.log_msg('Error while adding session info'))
return False
示例4: __getThreads
def __getThreads(self):
"""
It will fetch each thread and its associate infomarmation
and add the tasks
"""
threads = [x.findParent('tr') for x in self.soup.findAll('span', 'topictitle')]
if not threads:
log.info(self.log_msg('No threads are found for url %s'%\
self.currenturi))
return False
for thread in threads:
self.__total_threads_count += 1
if self.__total_threads_count > self.__max_threads_count:
log.info(self.log_msg('Reaching maximum post,Return false \
from the url %s'%self.currenturi))
return False
try:
date_str = stripHtml(thread.findAll('td')[-1].renderContents()).splitlines()[0].strip()
thread_time = datetime.strptime(date_str,'%a %b %d, %Y %I:%M %p')
except:
log.exception(self.log_msg('Cannot fetch the date for the url\
%s'%self.currenturi))
continue
if checkSessionInfo('Search', self.session_info_out, thread_time,\
self.task.instance_data.get('update')):
log.info(self.log_msg('Session info Returns True for url %s'%self.currenturi))
return False
self.__last_timestamp = max(thread_time , self.__last_timestamp )
try:
self.__links_to_process.append(self.__removeSessionId('http://www.blackberryblast.com/forums/' + thread.find('a', 'topictitle')['href'] ))
except:
log.exception(self.log_msg('Cannot find the thread url \
in the uri %s'%self.currenturi))
continue
return True
示例5: __addPosts
def __addPosts(self, links, parent_list):
"""Given a list of links to the discussion post, fetch the post contents and the author info
"""
h = HTTPConnection()
for link in links:
try:
page = {}
object_id = re.search('objectID=(\d+)', link).group(1)
link = "http://communities.vmware.com/message/%s#%s" %(object_id, object_id)
# Using the redirected url instead of the url given by the search page
self.currenturi = link
page['uri'] = normalize(link)
log.debug(self.log_msg("Fetching the post url %s" %(self.currenturi)))
if checkSessionInfo(self.genre, self.session_info_out, self.currenturi,
self.task.instance_data.get('update'), parent_list=parent_list):
# No need to pick this page
continue
res = self._getHTML()
self.rawpage = res['result']
self._setCurrentPage()
# First try extracting from the post body
if not self.__extractPostBody(page, object_id):
# if that fails, extract from the replies
self.__extractReplyBody(page, object_id)
except:
log.exception(self.log_msg("exception in extracting page"))
continue
page['posted_date'] = datetime.datetime.strftime(page['posted_date'], "%Y-%m-%dT%H:%M:%SZ")
checksum = md5.md5(''.join(sorted(page.values())).encode('utf-8','ignore')).hexdigest()
id = None
if self.session_info_out=={}:
id = self.task.id
result = updateSessionInfo(self.genre, self.session_info_out, self.currenturi,
checksum, 'Post', self.task.instance_data.get('update'),
parent_list=parent_list, Id=id)
if result['updated']:
page['path'] = page['parent_path'] = parent_list
page['path'].append(self.currenturi)
page['priority']=self.task.priority
page['level']=self.task.level
page['pickup_date'] = datetime.datetime.strftime(datetime.datetime.utcnow(),"%Y-%m-%dT%H:%M:%SZ")
page['connector_instance_log_id'] = self.task.connector_instance_log_id
page['connector_instance_id'] = self.task.connector_instance_id
page['workspace_id'] = self.task.workspace_id
page['client_id'] = self.task.client_id # TODO: Get the client from the project
page['client_name'] = self.task.client_name
page['last_updated_time'] = page['pickup_date']
page['versioned'] = False
page['entity'] = 'Review'
page['category'] = self.task.instance_data.get('category','')
page['task_log_id']=self.task.id
page['uri_domain'] = urlparse.urlparse(page['uri'])[1]
# Calculate the hash and get the session info thingy
self.pages.append(page)
return True
示例6: __getSearchResults
def __getSearchResults(self):
'''It will fetch the search results and and add the tasks
'''
try:
results = self.soup.findAll('dl','postprofile')
for result in results:
try:
if self.total_posts_count >= self.max_posts_count:
log.info(self.log_msg('Reaching maximum post,Return false'))
return False
self.total_posts_count = self.total_posts_count + 1
date_str = stripHtml(result.find('dd').renderContents())
try:
thread_time = datetime.strptime(date_str,'%Y-%m-%d, %H:%M')
except:
log.info(self.log_msg('Cannot find the thread time, task not added '))
continue
if checkSessionInfo('search',self.session_info_out, thread_time,self.task.instance_data.get('update')) and self.max_posts_count >= self.total_posts_count:
log.info(self.log_msg('Session info return True'))
continue
self.last_timestamp = max(thread_time , self.last_timestamp )
temp_task=self.task.clone()
temp_task.instance_data[ 'uri' ] = result.findAll('dd')[-3].find('a')['href']
log.info('taskAdded')
self.linksOut.append( temp_task )
except:
log.exception(self.log_msg('task not added'))
return True
except:
log.info(self.log_msg('cannot get the search results'))
示例7: __getParentPage
def __getParentPage(self,comment):
"""This will get the parent info
"""
page = {}
try:
self.__total_replies_count = page['ei_data_replies_count'] = int(stripHtml(comment.find('totalreplies').renderContents()))
page['title'] = page['data'] = stripHtml(comment.find('name').renderContents())
page['posted_date'] = stripHtml(comment.find('dateadded').renderContents()).split('.')[0]
unique_key = stripHtml(comment.find('messageid').renderContents())
if checkSessionInfo(self.__genre, self.session_info_out, self.task.instance_data['uri'],\
self.task.instance_data.get('update')):
log.info(self.log_msg('Session info return True, Already exists'))
return
result = updateSessionInfo('review', self.session_info_out, self.\
task.instance_data['uri'], get_hash( page ), 'forum', self.task.instance_data.get('update'))
if result['updated']:
page['path']=[unique_key]
page['parent_path']=[]
page['uri'] = self.currenturi
page['uri_domain'] = unicode(urlparse.urlparse(page['uri'])[1])
page['entity'] = 'post'
page.update(self.__task_elements_dict)
log.info(page['data'])
self.pages.append(page)
else:
log.info(self.log_msg('Result[updated] returned True for \
uri'%self.currenturi))
except:
log.exception(self.log_msg('Hierachy/Title not found in url %s'%self.currenturi))
return
示例8: __addPost
def __addPost(self, post, is_question=False):
"""
This will take the post tag , and fetch data and meta data and add it to
self.pages
"""
try:
unique_key = post.find('a', attrs={'name':True})['name']
permalink = self.currenturi + '#' + unique_key
if checkSessionInfo(self.__genre, self.session_info_out, \
unique_key, self.task.instance_data.get('update'),\
parent_list=[self.task.instance_data['uri']]):
log.info(self.log_msg('Session info returns True for uri %s'% \
permalink))
return False
page = self.__getData(post, is_question, unique_key)
if not page:
log.info(self.log_msg('page contains empty data, getdata \
returns False for uri %s'%self.currenturi))
return True
result = updateSessionInfo(self.__genre, self.session_info_out, unique_key, \
get_hash( page ),'forum', self.task.instance_data.get('update'),\
parent_list=[self.task.instance_data['uri']])
if result['updated']:
page['parent_path'] = [self.task.instance_data['uri']]
page['path'] = [self.task.instance_data['uri'], unique_key ]
page['uri_domain'] = urlparse.urlparse(page['uri'])[1]
page.update(self.__task_elements_dict)
self.pages.append(page)
else:
log.info(self.log_msg('Update session info returns False for \
url %s'%self.currenturi))
except:
log.exception(self.log_msg('Cannot add the post for the uri %s'%self.currenturi))
return True
示例9: __addPost
def __addPost(self, post, is_question=False):
try:
unique_key = post.find('span', attrs={'class': 'name'}).\
find('a')['name']
if checkSessionInfo(self.__genre, self.session_info_out, unique_key,
self.task.instance_data.get('update'),parent_list\
= [self.task.instance_data['uri']]):
log.info(self.log_msg('Session info returns True for %s' % unique_key))
return False
page = self.__getData(post, is_question)
log.info(self.log_msg('page'))
if not page:
log.info(self.log_msg('page contains empty data __getData returns False \
for uri %s'%self.currenturi))
return True
result = updateSessionInfo(self.__genre, self.session_info_out,
unique_key, get_hash( page ),'forum', self.task.\
instance_data.get('update'), parent_list = \
[ self.task.instance_data['uri'] ] )
if result['updated']:
page['parent_path'] = [self.task.instance_data['uri']]
page['path'] = [ self.task.instance_data['uri'], unique_key]
page['uri'] = self.currenturi
page['uri_domain'] = urlparse.urlparse(page['uri'])[1]
page.update(self.__task_elements_dict)
self.pages.append(page)
else:
log.info(self.log_msg('Update session info returns False for \
url %s'%self.currenturi))
except:
log.exception(self.log_msg('Cannot add the post for the uri %s' \
% self.currenturi))
return True
示例10: __fetchTopic
def __fetchTopic(self):
"""
"""
try:
headers = {}
headers['Accept-encoding'] = ''
headers['Accept-Language'] = 'en-US,en;q=0.8'
res=self._getHTML(self.currenturi,headers=headers)
self.rawpage=res['result']
self._setCurrentPage()
try:
post_hash= self.currenturi
except:
log.debug(self.log_msg("Error occured while creating the parent page hash"))
return False
if not checkSessionInfo(self.genre, self.session_info_out,
self.task.instance_data['uri'], self.task.instance_data.get('update')):
id=None
if self.session_info_out=={}:
id=self.task.id
log.debug('got the connector instance first time, sending updatesessioninfo the id : %s' % str(id))
result=updateSessionInfo(self.genre, self.session_info_out, self.task.instance_data['uri'], post_hash,
'Post', self.task.instance_data.get('update'), Id=id)
return True
except:
log.exception(self.log_msg("Error occured while processing %s"%(self.currenturi)))
return False
示例11: __processRSSFeeds
def __processRSSFeeds(self):
'''This will process the RSS Feeds of Facebook
'''
log.debug(self.log_msg("Entry Webpage: "+str(self.currenturi)))
parser = feedparser.parse(self.currenturi)
if len(parser.version) == 0 or not parser:
log.info(self.log_msg('parser version not found , returning'))
return False
log.info('number of entries %s'%(len(parser.entries)))
for entity in parser.entries:
try:
if checkSessionInfo('Review',self.session_info_out, entity['link'],
self.task.instance_data.get('update')):
log.info(self.log_msg('Session info returns True for uri %s'%entity['link']))
continue
result = updateSessionInfo('Review', self.session_info_out, entity['link'], '',
'Post', self.task.instance_data.get('update'))
if not result['updated']:
log.info(self.log_msg('Result not updated for uri %s'%entity['link']))
continue
temp_task = self.task.clone()
temp_task.instance_data['uri'] = normalize(entity['link'])
temp_task.pagedata['title'] = entity['title']
temp_task.pagedata['source'] = 'facebook.com'
temp_task.instance_data['connector_name'] = 'HTMLConnector'
temp_task.pagedata['source_type'] = 'rss'
self.linksOut.append(temp_task)
except:
log.exception(self.log_msg("exception in adding temptask to linksout"))
return True
示例12: __addPost
def __addPost(self, post, is_question=False):
try:
unique_key = post.find('a')['name'].replace('Post','')
log.debug(self.log_msg('POST: ' + str(unique_key)))
if checkSessionInfo('review', self.session_info_out, unique_key, \
self.task.instance_data.get('update'),parent_list\
= [self.task.instance_data['uri']]):
log.info(self.log_msg('Session info returns True for uri %s'\
%unique_key))
return False
page = self.__getData(post, is_question)
if not page:
return True
result = updateSessionInfo('review', self.session_info_out,
unique_key,get_hash( page ),'forum', self.task.instance_data.get\
('update'),parent_list=[self.task.instance_data['uri']])
if result['updated']:
page['path'] = [ self.task.instance_data['uri'], unique_key]
page['parent_path'] = [self.task.instance_data['uri']]
page['uri']= self.currenturi + '#' + unique_key
page['uri_domain'] = urlparse.urlparse(page['uri'])[1]
#page['entity'] = ''
#log.info(page)
page.update(self.__task_elements_dict)
self.pages.append(page)
log.info(self.log_msg('Page added'))
else:
log.info(self.log_msg('Update session info returns False for \
url %s'%self.currenturi))
except:
log.exception(self.log_msg('Cannot add the post for the uri %s'\
%self.currenturi))
return True
示例13: __addPosts
def __addPosts(self, post):
'''It will add the post
'''
try:
unique_key = post['id'].split('_')[-1]
if checkSessionInfo('review', self.session_info_out, unique_key, \
self.task.instance_data.get('update')):
log.info(self.log_msg('Session info returns True for uri %s'\
%self.currenturi))
return False
page = self.__getData(post)
if not page:
return True
result = updateSessionInfo('review', self.session_info_out,
unique_key,get_hash( page ),'review', self.task.instance_data.get('update'))
if result['updated']:
page['path'] = [ self.currenturi, unique_key]
page['parent_path'] = []
if not page.get('uri'):
page['uri']= self.currenturi + '#' + unique_key
page['uri_domain'] = urlparse.urlparse(page['uri'])[1]
page['entity'] = 'review'
page.update(self.__task_elements_dict)
self.pages.append(page)
log.info(self.log_msg('Page added'))
else:
log.info(self.log_msg('Update session info returns False for \
url %s'%self.currenturi))
except:
log.exception(self.log_msg('Cannot add the post for the uri %s'\
%self.currenturi))
return True
示例14: __addPost
def __addPost(self, post,is_question=False):
'''This will add the post
'''
try:
page = self.__getData(post,is_question)
if not page:
log.info(self.log_msg('No data found in url %s'%self.currenturi))
return True
unique_key = get_hash({'data':page['data'], 'title':page['title']})
if checkSessionInfo(self.__genre, self.session_info_out, \
unique_key, self.task.instance_data.get('update'),\
parent_list=[self.task.instance_data['uri']]):
log.info(self.log_msg('Session info returns True for uri %s'% \
self.currenturi))
return False
result = updateSessionInfo(self.__genre, self.session_info_out, unique_key, \
get_hash( page ),'forum', self.task.instance_data.get('update'),\
parent_list=[self.task.instance_data['uri']])
if result['updated']:
page['parent_path'] = [self.task.instance_data['uri']]
page['path'] = [self.task.instance_data['uri'], unique_key ]
page['uri_domain'] = urlparse.urlparse(page['uri'])[1]
page.update(self.__task_elements_dict)
self.pages.append(page)
else:
log.info(self.log_msg('Update session info returns False for \
url %s'%self.currenturi))
except:
log.exception(self.log_msg('Cannot add the post in url %s'%self.currenturi))
return True
示例15: __addPost
def __addPost(self, post, is_original_post=False):
try:
unique_key = stripHtml(str(post.findAll('div', 'oneLine')[2])).split()[2]
page = self.__get_data(post, is_original_post, unique_key)
if not page:
log.info(self.log_msg('page is empty, __get_data returns False for uri %s' %
self.currenturi))
return True
if checkSessionInfo(self.__genre, self.session_info_out,
unique_key, self.task.instance_data.get('update'),
parent_list=[self.task.instance_data['uri']]):
log.info(self.log_msg('Session info returns True for uri %s' %
self.task.instance_data['uri']))
return False
result = updateSessionInfo(self.__genre, self.session_info_out, unique_key,
get_hash(page),'forum', self.task.instance_data.get('update'),
parent_list=[self.task.instance_data['uri']])
if result['updated']:
page['parent_path'] = [self.task.instance_data['uri']]
page['path'] = [self.task.instance_data['uri'], unique_key]
page['uri_domain'] = urlparse.urlparse(page['uri'])[1]
page.update(self.__task_elements_dict)
self.pages.append(page)
else:
log.info(self.log_msg('Update session info returns False for url %s' % self.currenturi))
except:
log.exception(self.log_msg('Cannot add the post for the uri %s' % self.currenturi))
return True