本文整理汇总了Python中utils.utils.get_hash函数的典型用法代码示例。如果您正苦于以下问题:Python get_hash函数的具体用法?Python get_hash怎么用?Python get_hash使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了get_hash函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __addPosts
def __addPosts(self):
""" It will add Post for a particular thread
"""
try:
reviews = [ x.findParent('div') for x in self.soup.findAll('div','pBody')]
except:
log.exception(self.log_msg('Reviews are not found'))
return False
for i, review in enumerate(reviews):
post_type = ""
if i==0 and self.post_type:
post_type = "Question"
self.post_type = False
else:
post_type = "Suggestion"
page = self.__getData( review , post_type )
log.info(self.log_msg(page))
try:
review_hash = get_hash( page )
log.info(page)
unique_key = get_hash( {'data':page['data'],'title':page['title']})
if checkSessionInfo(self.genre, self.session_info_out, unique_key,\
self.task.instance_data.get('update'),parent_list\
=[self.parent_uri]):
log.info(self.log_msg('session info return True'))
continue
result=updateSessionInfo(self.genre, self.session_info_out, unique_key, \
review_hash,'Thread', self.task.instance_data.get('update'),\
parent_list=[self.parent_uri])
if not result['updated']:
log.info(self.log_msg('result not updated'))
continue
#page['first_version_id']=result['first_version_id']
#page['parent_id']= '-'.join(result['id'].split('-')[:-1])
#page['id'] = result['id']
parent_list = [self.parent_uri]
page['parent_path']=copy.copy(parent_list)
parent_list.append(unique_key)
page['path']=parent_list
page['priority']=self.task.priority
page['level']=self.task.level
page['pickup_date'] = datetime.strftime(datetime.utcnow()\
,"%Y-%m-%dT%H:%M:%SZ")
page['connector_instance_log_id'] = self.task.connector_instance_log_id
page['connector_instance_id'] = self.task.connector_instance_id
page['workspace_id'] = self.task.workspace_id
page['client_id'] = self.task.client_id
page['client_name'] = self.task.client_name
page['last_updated_time'] = page['pickup_date']
page['versioned'] = False
page['entity'] = 'Review'
page['category'] = self.task.instance_data.get('category','')
page['task_log_id']=self.task.id
page['uri'] = self.currenturi
page['uri_domain'] = urlparse.urlparse(page['uri'])[1]
self.pages.append( page )
#log.info(page)
log.info(self.log_msg('Review Added'))
except:
log.exception(self.log_msg('Error while adding session info'))
示例2: __addPost
def __addPost(self, post,is_question=False):
'''This will add the post
'''
try:
page = self.__getData(post,is_question)
if not page:
log.info(self.log_msg('No data found in url %s'%self.currenturi))
return True
unique_key = get_hash({'data':page['data'], 'title':page['title']})
if checkSessionInfo(self.__genre, self.session_info_out, \
unique_key, self.task.instance_data.get('update'),\
parent_list=[self.task.instance_data['uri']]):
log.info(self.log_msg('Session info returns True for uri %s'% \
self.currenturi))
return False
result = updateSessionInfo(self.__genre, self.session_info_out, unique_key, \
get_hash( page ),'forum', self.task.instance_data.get('update'),\
parent_list=[self.task.instance_data['uri']])
if result['updated']:
page['parent_path'] = [self.task.instance_data['uri']]
page['path'] = [self.task.instance_data['uri'], unique_key ]
page['uri_domain'] = urlparse.urlparse(page['uri'])[1]
page.update(self.__task_elements_dict)
self.pages.append(page)
else:
log.info(self.log_msg('Update session info returns False for \
url %s'%self.currenturi))
except:
log.exception(self.log_msg('Cannot add the post in url %s'%self.currenturi))
return True
示例3: __addPost
def __addPost(self, post):
"""
This will take the post tag , and fetch data and meta data and add it to
self.pages
"""
try:
page = self.__getData(post)
if not page:
log.info(self.log_msg('page contains empty data, getdata \
returns False for uri %s'%self.currenturi))
return True
unique_key = get_hash(page)
if checkSessionInfo(self.__genre, self.session_info_out, unique_key,\
self.task.instance_data.get('update')):
log.info(self.log_msg('Session info returns True for uri %s'%unique_key))
return False
result = updateSessionInfo(self.__genre, self.session_info_out, unique_key, \
get_hash( page ),'forum', self.task.instance_data.get('update'))
if result['updated']:
page['parent_path'] = []
page['path'] = [unique_key]
page['uri'] = self.currenturi
page['uri_domain'] = urlparse.urlparse(page['uri'])[1]
log.info(page)
page.update(self.__task_elements_dict)
self.pages.append(page)
else:
log.info(self.log_msg('Update session info returns False for \
url %s'%self.currenturi))
except:
log.exception(self.log_msg('Cannot add the post for the uri %s'%self.currenturi))
return True
示例4: __addPost
def __addPost(self, post):
'''It will add the post
'''
try:
page = self.__getData(post)
if not page:
return True
unique_key = get_hash( {'data' : page['data'] })
if checkSessionInfo('review', self.session_info_out, unique_key,\
self.task.instance_data.get('update'),parent_list\
= [self.currenturi]):
log.info(self.log_msg('Session info returns True'))
return False
result=updateSessionInfo('review', self.session_info_out, unique_key, \
get_hash( page ),'Review', self.task.instance_data.get('update'),\
parent_list=[self.currenturi])
if not result['updated']:
log.info(self.log_msg('Update session info returns False'))
return True
page['path'] = [self.currenturi]
page['parent_path'] = []
#page['path'].append(unique_key)
page['uri'] = self.currenturi
page['uri_domain'] = urlparse.urlparse(page['uri'])[1]
page['entity'] = 'post'
page.update(self.__task_elements_dict)
self.pages.append(page)
log.info(page)
log.info(self.log_msg('Post Added'))
return True
except:
log.exception(self.log_msg('Error while adding session info'))
return False
示例5: __addPosts
def __addPosts(self):
''
try:
reviews =self.soup.findAll('div',id=re.compile('^edit.*?'))
if not reviews:
log.info(self.log_msg('No reviews found'))
return False
except:
log.exception(self.log_msg('Reviews are not found'))
return False
for i, review in enumerate(reviews):
post_type = "Question"
if i==0 and self.post_type:
post_type = "Question"
self.post_type = False
else:
post_type = "Suggestion"
page = self.__getData( review , post_type )
if not page:
log.info(self.log_msg('no page is sent back'))
continue
try:
review_hash = get_hash( page )
# not changed ,bcoz, we already crawled
unique_key = get_hash( {'data':page['data'],'title':page['title']})
if checkSessionInfo(self.genre, self.session_info_out, unique_key,\
self.task.instance_data.get('update'),parent_list\
=[self.parent_uri]):
continue
result=updateSessionInfo(self.genre, self.session_info_out, unique_key, \
review_hash,'Review', self.task.instance_data.get('update'),\
parent_list=[self.parent_uri])
if not result['updated']:
continue
parent_list = [ self.parent_uri ]
page['parent_path'] = copy.copy(parent_list)
parent_list.append( unique_key )
page['path']=parent_list
page['priority']=self.task.priority
page['level']=self.task.level
page['pickup_date'] = datetime.strftime(datetime.utcnow()\
,"%Y-%m-%dT%H:%M:%SZ")
page['connector_instance_log_id'] = self.task.connector_instance_log_id
page['connector_instance_id'] = self.task.connector_instance_id
page['workspace_id'] = self.task.workspace_id
page['client_id'] = self.task.client_id
page['client_name'] = self.task.client_name
page['last_updated_time'] = page['pickup_date']
page['versioned'] = False
page['entity'] = 'Review'
page['category'] = self.task.instance_data.get('category','')
page['task_log_id']=self.task.id
#page['uri'] = self.currenturi #Skumar
page['uri_domain'] = urlparse.urlparse(page['uri'])[1]
self.pages.append( page )
#log.info(page)
log.info(self.log_msg('Review Added'))
except:
log.exception(self.log_msg('Error while adding session info'))
示例6: __addPosts
def __addPosts(self):
""" It will add Post for a particular thread
"""
try:
reviews = [ BeautifulSoup(x) for x in self.soup.find('table','Frm_MsgTable').__str__().split('<!-- Start Message head -->')[1:]]
except:
log.exception(self.log_msg('Reviews are not found'))
return False
post_type = "Question"
log.info([review.find('a')['name'] for review in reviews])
for i, review in enumerate(reviews):
if i==0 and self.post_type:
post_type = "Question"
self.post_type = False
else:
post_type = "Suggestion"
page = self.__getData( review , post_type )
if not page:
log.info(self.log_msg('Todays Post , so, continue with other post'))
continue
try:
review_hash = get_hash( page )
#unique_key = review.find('a')['name']
unique_key = get_hash( {'data':page['data'],'title':page['title']})
if checkSessionInfo(self.genre, self.session_info_out, unique_key,\
self.task.instance_data.get('update'),parent_list\
=[self.parent_uri]):
continue
result=updateSessionInfo(self.genre, self.session_info_out, unique_key, \
review_hash,'Review', self.task.instance_data.get('update'),\
parent_list=[self.parent_uri])
if not result['updated']:
continue
parent_list = [self.parent_uri]
page['parent_path']=copy.copy(parent_list)
parent_list.append(unique_key)
page['path']=parent_list
page['priority']=self.task.priority
page['level']=self.task.level
page['pickup_date'] = datetime.strftime(datetime.utcnow()\
,"%Y-%m-%dT%H:%M:%SZ")
page['connector_instance_log_id'] = self.task.connector_instance_log_id
page['connector_instance_id'] = self.task.connector_instance_id
page['workspace_id'] = self.task.workspace_id
page['client_id'] = self.task.client_id
page['client_name'] = self.task.client_name
page['last_updated_time'] = page['pickup_date']
page['versioned'] = False
page['entity'] = 'Review'
page['category'] = self.task.instance_data.get('category','')
page['task_log_id']=self.task.id
page['uri'] = page.get('uri',self.parent_uri)
page['uri_domain'] = urlparse.urlparse(page['uri'])[1]
self.pages.append( page )
#log.info(page)
log.info(self.log_msg('Review Added'))
except:
log.exception(self.log_msg('Error while adding session info'))
示例7: __addPosts
def __addPosts(self):
""" It will add Post for a particular thread
"""
try:
reviews = self.soup.findAll('table',id='tblTitle')
except:
log.exception(self.log_msg('Reviews are not found'))
return False
for i, review in enumerate(reviews):
if i==0 and self.post_type:
post_type = "Question"
self.post_type = False
else:
post_type = "Suggestion"
try:
page = self.__getData( review, post_type )
unique_key = get_hash( {'data':page['data'],'title':page['title']})
#unique_key = stripHtml(review.findNext('a',id=re.compile('PostLink')).renderContents()).split('#')[-1]
if checkSessionInfo(self.genre, self.session_info_out, unique_key,\
self.task.instance_data.get('update'),parent_list\
=[self.parent_uri]):
log.info(self.log_msg('Session info returns True'))
continue
except:
log.info(self.log_msg('unique key not found'))
continue
try:
result=updateSessionInfo(self.genre, self.session_info_out, unique_key, \
get_hash( page ),'Review', self.task.instance_data.get('update'),\
parent_list=[self.parent_uri])
if not result['updated']:
continue
parent_list = [ self.parent_uri ]
page['parent_path'] = copy.copy(parent_list)
parent_list.append( unique_key )
page['path']=parent_list
page['priority']=self.task.priority
page['level']=self.task.level
page['pickup_date'] = datetime.strftime(datetime.utcnow()\
,"%Y-%m-%dT%H:%M:%SZ")
page['connector_instance_log_id'] = self.task.connector_instance_log_id
page['connector_instance_id'] = self.task.connector_instance_id
page['workspace_id'] = self.task.workspace_id
page['client_id'] = self.task.client_id
page['client_name'] = self.task.client_name
page['last_updated_time'] = page['pickup_date']
page['versioned'] = False
page['entity'] = 'Review'
page['category'] = self.task.instance_data.get('category','')
page['task_log_id']=self.task.id
page['uri'] = self.currenturi
page['uri_domain'] = urlparse.urlparse(page['uri'])[1]
self.pages.append( page )
log.info(self.log_msg('Review Added'))
except:
log.exception(self.log_msg('Error while adding session info'))
示例8: __setParentPage
def __setParentPage(self):
""" this will set parent page info """
page = {}
try:
page['title'] = stripHtml(self.soup.find('div','brdSubHd grey top botOne').renderContents()).split('replies')[-1].strip()
#log.info(page['title'])
page['data'] = stripHtml(self.soup.find('div','mbPanel clearPanel').renderContents())
try:
date_str = stripHtml(self.soup.find('div','brdSubHd blue').renderContents()).split('on')[-1].strip()
page['posted_date'] = datetime.strftime(datetime.strptime(date_str,'%d/%m/%y at %I:%M %p'),"%Y-%m-%dT%H:%M:%SZ")
except:
log.exception(self.log_msg('Posted date not found'))
page['posted_date'] = datetime.strftime(datetime.utcnow(), "%Y-%m-%dT%H:%M:%SZ")
except:
log.exception(self.log_msg('main page title not found'))
return False
unique_key = get_hash({'title': page['title'],'data' : page['data']})
if checkSessionInfo(self.genre, self.session_info_out, unique_key,\
self.task.instance_data.get('update')):
log.info(self.log_msg('Session info returns True for uri %s'\
%self.currenturi))
return False
page_data_keys = ['et_first_author_name', 'ei_thread_replies_count', \
'edate_last_post_date']
[page.update({each:self.task.pagedata.get(each)}) for each in \
page_data_keys if self.task.pagedata.get(each)]
try:
result=updateSessionInfo(self.genre, self.session_info_out, unique_key, \
get_hash( page ),'Review', self.task.instance_data.get('update'))
if not result['updated']:
log.exception(self.log_msg('Update session info returns False'))
return True
page['parent_path'] = page['path'] = [self.task.instance_data['uri']]
## page['path'] = [unique_key]
#page['path'].append(unique_key)
page['uri'] = self.currenturi
page['entity'] = 'Review'
page['uri_domain'] = urlparse.urlparse(page['uri'])[1]
page.update(self.__task_elements_dict)
self.pages.append(page)
#log.info(page)
log.info(self.log_msg('Post Added'))
return True
except:
log.exception(self.log_msg('Error while adding session info'))
return False
示例9: __addPosts
def __addPosts(self):
""" It will add Post for a particular thread
"""
try:
"""for block_quote in re.findall('<BLOCKQUOTE>.*?</BLOCKQUOTE>',self.rawpage,re.S):
self.rawpage = self.rawpage.replace(block_quote,'')
self._setCurrentPage()
#reviews = self.soup.findAll('div','thread')"""
reviews = self.soup.findAll('div','wrapper_comment')
except:
log.exception(self.log_msg('Reviews are not found'))
return False
for i, review in enumerate(reviews):
post_type = "Question"
if i==0:
post_type = "Question"
else:
post_type = "Suggestion"
try:
unique_key = dict(parse_qsl(review.find('div','commentbox_nav').find('a',text='Reply').parent['href'].split('?')[-1]))['ReplyToPostID']
if checkSessionInfo(self.genre, self.session_info_out, unique_key,\
self.task.instance_data.get('update'),parent_list\
=[self.parent_uri]):
log.info(self.log_msg('Session info returns True'))
continue
page = self.__getData( review, post_type )
log.info(page)
except:
log.info(self.log_msg('unique key not found'))
continue
try:
result=updateSessionInfo(self.genre, self.session_info_out, unique_key, \
get_hash( page ),'Review', self.task.instance_data.get('update'),\
parent_list=[self.parent_uri])
if not result['updated']:
continue
parent_list = [ self.parent_uri ]
page['parent_path'] = copy.copy(parent_list)
parent_list.append( unique_key )
page['path']=parent_list
page['priority']=self.task.priority
page['level']=self.task.level
page['pickup_date'] = datetime.strftime(datetime.utcnow()\
,"%Y-%m-%dT%H:%M:%SZ")
page['connector_instance_log_id'] = self.task.connector_instance_log_id
page['connector_instance_id'] = self.task.connector_instance_id
page['workspace_id'] = self.task.workspace_id
page['client_id'] = self.task.client_id
page['client_name'] = self.task.client_name
page['last_updated_time'] = page['pickup_date']
page['versioned'] = False
page['entity'] = 'Review'
page['category'] = self.task.instance_data.get('category','')
page['task_log_id']=self.task.id
page['uri'] = self.currenturi
page['uri_domain'] = urlparse.urlparse(page['uri'])[1]
self.pages.append( page )
log.info(self.log_msg('Review Added'))
except:
log.exception(self.log_msg('Error while adding session info'))
示例10: __getParentPage
def __getParentPage(self):
"""
This will get the parent info
"""
page = {}
try:
self.hierarchy = page['et_thread_hierarchy'] = [stripHtml(x.renderContents()) for x in self.soup.find('div','CommonBreadCrumbArea').findAll('a')][1:]
page['title']= page['et_thread_hierarchy'][-1]
except:
log.info(self.log_msg('Thread hierarchy is not found'))
page['title']=''
try:
self.thread_id = page['et_thread_id'] = unicode(self.currenturi.split('/')[-1].replace('.aspx',''))
except:
log.info(self.log_msg('Thread id not found'))
if checkSessionInfo(self.genre, self.session_info_out, self.parent_uri,\
self.task.instance_data.get('update')):
log.info(self.log_msg('Session info return True, Already exists'))
return False
for each in ['et_thread_last_post_author','ei_thread_replies_count','edate_last_post_date']:
try:
page[each] = self.task.pagedata[each]
except:
log.info(self.log_msg('page data cannot be extracted for %s'%each))
try:
post_hash = get_hash( page )
id=None
if self.session_info_out=={}:
id=self.task.id
result=updateSessionInfo( self.genre, self.session_info_out, self.\
parent_uri, post_hash,'Forum',self.task.instance_data.get('update'), Id=id)
if not result['updated']:
return False
page['path']=[self.parent_uri]
page['parent_path']=[]
page['uri'] = normalize( self.currenturi )
page['uri_domain'] = unicode(urlparse.urlparse(page['uri'])[1])
page['priority']=self.task.priority
page['level']=self.task.level
page['pickup_date'] = datetime.strftime(datetime.utcnow(),"%Y-%m-%dT%H:%M:%SZ")
page['posted_date'] = datetime.strftime(datetime.utcnow(),"%Y-%m-%dT%H:%M:%SZ")
page['connector_instance_log_id'] = self.task.connector_instance_log_id
page['connector_instance_id'] = self.task.connector_instance_id
page['workspace_id'] = self.task.workspace_id
page['client_id'] = self.task.client_id
page['client_name'] = self.task.client_name
page['last_updated_time'] = page['pickup_date']
page['versioned'] = False
page['data'] = ''
page['task_log_id']=self.task.id
page['entity'] = 'Post'
page['category']=self.task.instance_data.get('category','')
self.pages.append(page)
log.info(page)
log.info(self.log_msg('Parent Page added'))
return True
except :
log.exception(self.log_msg("parent post couldn't be parsed"))
return False
示例11: __addReviews
def __addReviews(self):
'''It will fetch the the reviews and append it to self.pages
'''
reviews= [x.findParent('div').findParent('div') for x in self.soup.findAll('span' ,'ctedit')]
log.debug(self.log_msg('# Of Reviews found is %d'%len(reviews)))
for review in reviews:
try:
unique_key = review.find('a')['name']
if checkSessionInfo(self.genre, self.session_info_out, unique_key,\
self.task.instance_data.get('update'),parent_list\
=[ self.task.instance_data['uri'] ]):
log.info(self.log_msg('session info return True in url %s'%self.currenturi))
continue
page = self.__getData(review)
if not page:
log.info(self.log_msg('No data found in url %s'%self.currenturi))
continue
result = updateSessionInfo(self.genre, self.session_info_out, unique_key, \
get_hash(page),'comment', self.task.instance_data.get('update'),\
parent_list=[self.task.instance_data['uri']])
if not result['updated']:
log.info(self.log_msg('result not updated'))
continue
page['path'] = page['parent_path'] = [ self.task.instance_data['uri'] ]
page['path'].append( unique_key )
page['entity'] = 'comment'
page['uri'] = self.task.instance_data['uri']
page['uri_domain'] = urlparse.urlparse(page['uri'])[1]
page.update(self.__task_elements_dict)
self.pages.append(page)
log.info(self.log_msg('Review Added'))
except:
log.exception(self.log_msg('Exception while adding session info in url %s'%self.currenturi))
示例12: __addPost
def __addPost(self, post, is_question=False):
"""
This will take the post tag , and fetch data and meta data and add it to
self.pages
"""
try:
unique_key = stripHtml(post.find('div', id=re.compile('msgId\d+'))\
.renderContents())[1:-1].replace('Msg Id: ', '')
if checkSessionInfo(self.__genre, self.session_info_out, unique_key, \
self.task.instance_data.get('update'),parent_list\
= [self.task.instance_data['uri']]):
log.info(self.log_msg('Session info returns True for uri %s'%unique_key))
return False
page = self.__getData(post, is_question)
if not page:
log.info(self.log_msg('page contains empty data, getdata \
returns False for uri %s'%self.currenturi))
return True
result = updateSessionInfo(self.__genre, self.session_info_out, unique_key, \
get_hash( page ),'forum', self.task.instance_data.get('update'), \
parent_list=[self.task.instance_data['uri']])
if result['updated']:
page['parent_path'] = [self.task.instance_data['uri']]
page['path'] = [self.task.instance_data['uri'], unique_key]
page['uri_domain'] = urlparse.urlparse(page['uri'])[1]
page.update(self.__task_elements_dict)
self.pages.append(page)
log.info(self.log_msg('Page added'))
else:
log.info(self.log_msg('Update session info returns False for \
url %s'%self.currenturi))
return False
except:
log.exception(self.log_msg('Cannot add the post for the uri %s'%self.currenturi))
return True
示例13: __addPost
def __addPost(self, post, is_question = False):
"""
This will take the post tag , and fetch data and meta data and add it to
self.pages
"""
try:
unique_tag = post.find('a', 'postcounter')
#is_question = stripHtml(unique_tag.renderContents())== u'#1'
unique_key = unique_tag['href']
if checkSessionInfo(self.__genre, self.session_info_out, unique_key,\
self.task.instance_data.get('update')):
log.info(self.log_msg('Session info returns True for uri %s'%unique_key))
return False
page = self.__getData(post, is_question, unique_key)
if not page:
log.info(self.log_msg('page contains empty data, getdata \
returns False for uri %s'%self.currenturi))
return True
result = updateSessionInfo(self.__genre, self.session_info_out, unique_key, \
get_hash( page ),'forum', self.task.instance_data.get('update'))
if result['updated']:
page['parent_path'] = []
page['path'] = [unique_key]
page['uri'] = unique_key
page['uri_domain'] = urlparse.urlparse(page['uri'])[1]
page.update(self.__task_elements_dict)
self.pages.append(page)
else:
log.info(self.log_msg('Update session info returns False for \
url %s'%self.currenturi))
except:
log.exception(self.log_msg('Cannot add the post for the uri %s'%self.currenturi))
return True
示例14: __addPost
def __addPost(self, post, is_question=False):
try:
unique_key = post.find('span', attrs={'class': 'name'}).\
find('a')['name']
if checkSessionInfo(self.__genre, self.session_info_out, unique_key,
self.task.instance_data.get('update'),parent_list\
= [self.task.instance_data['uri']]):
log.info(self.log_msg('Session info returns True for %s' % unique_key))
return False
page = self.__getData(post, is_question)
log.info(self.log_msg('page'))
if not page:
log.info(self.log_msg('page contains empty data __getData returns False \
for uri %s'%self.currenturi))
return True
result = updateSessionInfo(self.__genre, self.session_info_out,
unique_key, get_hash( page ),'forum', self.task.\
instance_data.get('update'), parent_list = \
[ self.task.instance_data['uri'] ] )
if result['updated']:
page['parent_path'] = [self.task.instance_data['uri']]
page['path'] = [ self.task.instance_data['uri'], unique_key]
page['uri'] = self.currenturi
page['uri_domain'] = urlparse.urlparse(page['uri'])[1]
page.update(self.__task_elements_dict)
self.pages.append(page)
else:
log.info(self.log_msg('Update session info returns False for \
url %s'%self.currenturi))
except:
log.exception(self.log_msg('Cannot add the post for the uri %s' \
% self.currenturi))
return True
示例15: __addPost
def __addPost(self, post, is_question=False):
"""
This will take the post tag , and fetch data and meta data and add it to
self.pages
"""
try:
unique_key = post.find('a', attrs={'name':True})['name']
permalink = self.currenturi + '#' + unique_key
if checkSessionInfo(self.__genre, self.session_info_out, \
unique_key, self.task.instance_data.get('update'),\
parent_list=[self.task.instance_data['uri']]):
log.info(self.log_msg('Session info returns True for uri %s'% \
permalink))
return False
page = self.__getData(post, is_question, unique_key)
if not page:
log.info(self.log_msg('page contains empty data, getdata \
returns False for uri %s'%self.currenturi))
return True
result = updateSessionInfo(self.__genre, self.session_info_out, unique_key, \
get_hash( page ),'forum', self.task.instance_data.get('update'),\
parent_list=[self.task.instance_data['uri']])
if result['updated']:
page['parent_path'] = [self.task.instance_data['uri']]
page['path'] = [self.task.instance_data['uri'], unique_key ]
page['uri_domain'] = urlparse.urlparse(page['uri'])[1]
page.update(self.__task_elements_dict)
self.pages.append(page)
else:
log.info(self.log_msg('Update session info returns False for \
url %s'%self.currenturi))
except:
log.exception(self.log_msg('Cannot add the post for the uri %s'%self.currenturi))
return True