本文整理汇总了Python中src.script.db.handle_db函数的典型用法代码示例。如果您正苦于以下问题:Python handle_db函数的具体用法?Python handle_db怎么用?Python handle_db使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了handle_db函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_apk
def get_apk(self, response):
topic_url = referer_url = response.request.meta['referer_url']
update_date = time.strftime('%Y-m-%d %H:%M:%S')
is_crawled = '1'
priority_rating = '0'
filename = ''.join([str(random.randrange(1,100000)), '.apk'])
# if os.path.exists(''.join([os.getcwd(), os.sep, 'gfan', os.sep, 'download', os.sep, time.strftime('%Y-%m-%d'),os.sep])) is False:
# os.mkdir(''.join([os.getcwd(), os.sep, 'gfan', os.sep, 'download', os.sep, time.strftime('%Y-%m-%d'),os.sep]))
#调用新建文件夹的方法
down_dir = utils.make_spiderdir(self.platform, 'download')
#解码:有的文件可能是utf-8编码,解为unicode
try:
filename = filename.decode('utf-8', 'ignore')
except:
pass
filename = ''.join([down_dir, os.sep, str(time.time()).split('.')[0], filename])
open(filename, 'wb').write(response.body)
#下载后在存数据库,确保准确性
hashurl = sql.hash_topic(topic_url)
updatedate = time.strftime('%Y-%m-%d %H:%M:%S')
filename = filename.replace('\\', '\\\\')
insert_sql = sql.insert_softwareinfo(self.platform, hashurl, topic_url, updatedate, filename)
status = handle_db(insert_sql)
log.msg(str(status), log.DEBUG)
#更新topic数据库表
insert_sql = sql.insert_topic_url(self.platform, topic_url,referer_url, updatedate, is_crawled, priority_rating)
statusn = handle_db(insert_sql)
log.msg(str(status), log.DEBUG)
try:
autocopy.copy(filename, self.platform)
log.msg('copy job is successed', log.INFO)
except:
log.msg(str(traceback.print_exc()), log.ERROR)
log.msg('copy job is failture', log.ERROR)
示例2: reply_status
def reply_status(self, response):
#重新返回首页,对特别是需要回复的模块特别有用,而不需要回复的帖子缺对性能是个损失.
#查询回复状态
success = u'非常感谢,你的回复已经发布'.encode('gbk', 'ignore')
status = re.findall(success, response.body)
username_sql = sql.select_accountbyusername(self.username, self.platform)
#回复数量限制
failture = u'对不起,您所在的用户组每小时限制发帖 '.encode('gbk', 'ignore')
failture_status = re.findall(failture, response.body, re.I)
if failture_status:
return
if status:
log.msg('reply success', log.INFO)
reply_nums = handle_db(username_sql)['data'][0]
self.point_num = reply_nums[5]
self.reply_num = reply_nums[-2]
self.reply_num += 1
self.point_num += 1
#回复成功.账号reply_num+1,积分+1
try:
update_replynum = sql.update_account_username(self.username, self.platform, self.reply_num, self.point_num)
update_success = handle_db(update_replynum)
log.msg(('<username: %s \'s integral is : +1 ,now integral is %s>' % (self.username, self.reply_num)), log.INFO)
except Exception, e:
log.msg(str(traceback.print_exc()), log.ERROR)
示例3: get_apk
def get_apk(self, response):
'''
将下载地址下载的包写入文件,并且
'''
filename = response.request.meta['filename']
filename = ''.join([str(random.randrange(1,100000)), '.', filename])
# if os.path.exists(''.join([os.getcwd(), os.sep, 'xda', os.sep, 'download', os.sep, time.strftime('%Y-%m-%d'),os.sep])) is False:
# os.mkdir(''.join([os.getcwd(), os.sep, 'xda', os.sep, 'download', os.sep, time.strftime('%Y-%m-%d'),os.sep]))
#调用新建文件夹的方法
down_dir = utils.make_spiderdir(self.name, 'download')
#解码:有的文件可能是utf-8编码,解为unicode
try:
filename = filename.decode('utf-8', 'ignore')
except:
pass
filename = ''.join([down_dir, os.sep, str(time.time()).split('.')[0], filename])
open(filename, 'wb').write(response.body)
#下载后在存数据库,确保准确性
topic_url = response.request.meta['topic_url']
hashurl = sql.hash_topic(topic_url)
updatedate = time.strftime('%Y-%m-%d %H:%M:%S')
filename = filename.replace('\\', '\\\\')
insert_sql = sql.insert_softwareinfo(self.name, hashurl, topic_url, updatedate, filename)
status = db.handle_db(insert_sql)
log.msg(str(status), log.DEBUG)
#更新topic数据库表
update_topic_url = sql.topicurl_withcrawed(topic_url)
status = db.handle_db(update_topic_url)
log.msg(str(status), log.DEBUG)
#备份目录
try:
autocopy.copy(filename, self.name)
log.msg('copy job is successed', log.INFO)
except:
log.msg('copy job is failture', log.ERROR)
示例4: parse_item
def parse_item(self, response):
#解析出需要下载的包
log.msg(response.url, log.INFO)
try:
topic_url = response.request.meta['topic_url']
if re.findall(u'指定的主题不存在或已被删除或正在被审核,请返回'.encode('gbk', 'ignore'), response.body, re.I):
#执行更新topic操作
log.msg(('this topic_url(%s) has be removed by admin' % response.url), log.INFO)
update_topic_url = sql.topicurl_withcrawed(topic_url)
status = handle_db(update_topic_url)
return
topic_url = response.request.meta['topic_url']
except:
log.msg(str(traceback.print_exc()), log.INFO)
try:
urls = re.findall('<a href="(.*?)".*\.apk</a>', response.body, re.I)
if urls == []:
#必须先回复在下载的数据
reply_urls = re.findall(u'如果你要查看本帖隐藏内容请'.encode('gbk', 'ignore'), response.body, re.I)
#判断是否有匹配必须回复的,有就更新抓取级别为-1,单独使用回复加下载一体模块查找级别为-1的模块
print 'reply_urls'
print reply_urls
if reply_urls != []:
update_topic_priority_sql = sql.update_topic_priority(self.platform, topic_url, '-1')
n = handle_db(update_topic_priority_sql)
log.msg(str(n), log.INFO)
return
log.msg(('this url->%s has not apk file' % response.request.meta['topic_url']), log.INFO)
#如果没有apk文件,该主题链接失去意义,更新is_crawled=1
update_topic_url = sql.topicurl_withcrawed(topic_url)
status = handle_db(update_topic_url)
#回复成功后,没有下载链接,本次登陆错操作不能中止,继续执行
self.topic_reply_num -= 1
log.msg(('reply success, will download software%s' % str(self.topic_reply_num)), log.DEBUG)
log.msg(str(response), log.DEBUG)
request = self.get_down_topic(response, url)
yield request
else:
for url in set(urls):
url = url.replace('amp;', '')
print 'url:', url
request = response.request.replace(url=url, callback=self.get_downloadpath)
request.meta['url'] = response.url
yield request
except IndexError, e:
log.msg(str(traceback.print_exc()), log.ERROR)
#没有 apk的下载包的地方,更新该链接抓取状态为1
update_crawled_sql = sql.topicurl_withcrawed(response.url)
status = handler_db(update_crawled_sql)
if status['errorNo'] == 1:
log.msg('this url has no apk', log.INFO)
return
示例5: parse_item
def parse_item(self, response):
#解析出需要下载的包
log.msg(response.url, log.INFO)
try:
if re.findall(u'抱歉,指定的主题不存在或已被删除或正在被审核'.encode('gbk', 'ignore'), response.body, re.I):
#执行更新topic操作
log.msg(('this topic_url(%s) has be removed by admin' % response.url), log.INFO)
update_topic_url = sql.topicurl_withcrawed(topic_url)
status = handle_db(update_topic_url)
return
topic_url = response.request.meta['topic_url']
except:
log.msg(str(traceback.print_exc()), log.INFO)
try:
urls = re.findall('<a href="(forum.php?[^"]+)" target="_blank">.*?\.apk</a>', response.body, re.I)
if urls == []:
#没有回复不能下载
noreply_regex = u'如果您要查看本帖隐藏内容请'.encode('gbk', 'ignore')
noreply = re.findall(noreply_regex, response.body, re.I)
for i in noreply:
print i.decode('gbk','ignore')
if noreply != []:
#需要执行更新topic方法,avail字段为-1
update_topic_priority_sql = sql.update_topic_priority(self.platform, topic_url, '-1')
n = handle_db(update_topic_priority_sql)
log.msg(''.join(['hide_apk_update topic_url priority=-1', str(n)]), log.INFO)
return
else:
log.msg(('this url->%s has not apk file' % response.request.meta['topic_url']), log.INFO)
#如果没有apk文件,该主题链接失去意义,更新is_crawled=1
update_topic_url = sql.topicurl_withcrawed(topic_url)
status = handle_db(update_topic_url)
return
else:
for url in set(urls):
url = 'http://bbs.mumayi.com/%s' % url
request = response.request.replace(url=url, callback=self.get_apk)
request.meta['url'] = response.url
yield request
except IndexError, e:
log.msg(str(traceback.print_exc()), log.ERROR)
#没有 apk的下载包的地方,更新该链接抓取状态为1
update_crawled_sql = sql.topicurl_withcrawed(response.url)
status = handler_db(update_crawled_sql)
if status['errorNo'] == 1:
log.msg('this url has no apk', log.INFO)
return
示例6: get_proxy
def get_proxy(spider_name, enable):
try:
proxy_sql = sql.get_proxy_url(spider_name, enable)
n = handle_db(proxy_sql)['data'][0][0]
print n
except:
#全部查询完,如果没有结果,要调增加操作,在重复调用本方法,即回调。没值会异常
#至所有代理均可用,出错在说
update_enable_sql = sql.update_proxy_enable(spider_name)
n = handle_db(update_enable_sql)
print n
#重复调用本方法
get_proxy(spider_name, enable)
return n
示例7: get_integral_page
def get_integral_page(self, response):
#正则匹配相应的积分
try:
print 'get_integral_page'
integral = re.findall(u'金币: <a href=".*?">(.*?)</a>'.encode('gbk', 'ignore'), response.body, re.I)[0].replace(' ', '')
print 'integral', integral
if integral:
#如果取到相应的积分,执行判断该积分是否>20,小于20,更新数据库,跳出,大于20,更新数据库,向下执行
update_user_integral_sql = sql.update_account_point(self.username, self.platform, integral)
n = handle_db(update_user_integral_sql)
log.msg(('update user(%s)\'s integral is: %s, %s' % (self.username, integral, n)), log.INFO)
#用户积分低于多少不能进行下载,可配置.
if int(integral) > settings.INTEGERAL:
request = self.get_topic(response)
return request
else:
print 'return None'
return
else:
log.msg('cann\'t get user\'s integral', log.ERROR)
request = self.get_topic(response)
return request
except:
log.msg(str(traceback.print_exc()), log.ERROR)
request = self.get_topic(response)
print 'except'
return request
示例8: parse_item
def parse_item(self, response):
#解析出需要下载的包
topic_url = response.request.meta['topic_url']
try:
urls = re.findall('<a.*?href="(.*?)".*?>.*\.[apk|zip|rar].*?</a>', response.body, re.I)
print urls
if urls == []:
print 'this url->%s has not apk file' % response.request.meta['topic_url']
#如果没有apk文件,该主题链接失去意义,更新is_crawled=1
update_topic_url = sql.topicurl_withcrawed(topic_url)
status = handle_db(update_topic_url)
print status
for url in set(urls):
url = 'http://bbs.gfan.com/%s' % url
request = response.request.replace(url=url, callback=self.get_attachementpath)
request.meta['url'] = response.url
yield request
return
except IndexError, e:
traceback.print_exc()
#没有 apk的下载包的地方,更新该链接抓取状态为1
update_crawled_sql = sql.topicurl_withcrawed(response.url)
status = handler_db(update_crawled_sql)
if status['errorNo'] == 1:
print 'this url has no apk'
return
示例9: get_apk
def get_apk(self, response):
filename = ''.join([str(random.randrange(1,100000)), '.apk'])
# if os.path.exists(''.join([os.getcwd(), os.sep, 'gfan', os.sep, 'download', os.sep, time.strftime('%Y-%m-%d'),os.sep])) is False:
# os.mkdir(''.join([os.getcwd(), os.sep, 'gfan', os.sep, 'download', os.sep, time.strftime('%Y-%m-%d'),os.sep]))
#调用新建文件夹的方法
down_dir = utils.make_spiderdir(self.platform, 'download')
#解码:有的文件可能是utf-8编码,解为unicode
try:
filename = filename.decode('utf-8', 'ignore')
except:
pass
filename = ''.join([down_dir, os.sep, str(time.time()).split('.')[0], filename])
open(filename, 'wb').write(response.body)
#下载后在存数据库,确保准确性
topic_url = response.request.meta['topic_url']
hashurl = sql.hash_topic(topic_url)
updatedate = time.strftime('%Y-%m-%d %H:%M:%S')
filename = filename.replace('\\', '\\\\')
insert_sql = sql.insert_softwareinfo(self.platform, hashurl, topic_url, updatedate, filename)
status = handle_db(insert_sql)
log.msg(str(status), log.DEBUG)
#更新topic数据库表
update_topic_url = sql.topicurl_withcrawed(topic_url)
status = handle_db(update_topic_url)
log.msg(str(status), log.DEBUG)
#能进入本方法执行,表示已经下载了该response.积分-1
account_sql = sql.select_accountbyusername(self.username, self.platform)
point_num = handle_db(account_sql)['data'][0][5]
point_num -= 1
#然后执行更新
update_account_pointsql = sql.update_account_point(self.username, self.platform, point_num)
n = handle_db(update_account_pointsql)
if n['errorNo'] == 0:
log.msg(('<username: %s \'s integral is : -1 ,now integral is %s>' % (self.username, self.reply_num)), log.INFO)
try:
autocopy.copy(filename, self.platform)
log.msg('copy job is successed', log.INFO)
except:
log.msg(str(traceback.print_exc()), log.ERROR)
log.msg('copy job is failture', log.ERROR)
request_topic = self.repeat_reply(response)
return request_topic
示例10: reply_status
def reply_status(self, response):
#重新返回首页,对特别是需要回复的模块特别有用,而不需要回复的帖子缺对性能是个损失.
#查询回复状态
success = u'非常感谢,你的回复已经发布'.encode('gbk', 'ignore')
status = re.findall(success, response.body)
username_sql = sql.select_accountbyusername(self.username, self.platform)
print 'status', status
#回复数量限制
failture = u'对不起,您所在的用户组每小时限制发帖 '.encode('gbk', 'ignore')
failture_status = re.findall(failture, response.body, re.I)
if failture_status:
print u'对不起,您所在的用户组每小时限制发帖 '
return
if status:
log.msg('reply success', log.INFO)
reply_nums = handle_db(username_sql)['data'][0]
self.point_num = reply_nums[5]
self.reply_num = reply_nums[-2]
self.reply_num += 1
self.point_num += 1
#回复成功.账号reply_num+1,积分+1
try:
update_replynum = sql.update_account_username(self.username, self.platform, self.reply_num, self.point_num)
update_success = handle_db(update_replynum)
log.msg(('<username: %s \'s integral is : +1 ,now integral is %s>' % (self.username, self.reply_num)), log.INFO)
#回复成功,执行下载
url = response.request.meta['topic_url']
print u'回复成功主题:url---->', url
#回复成功,重新进入topic页
self.topic_reply_num -= 1
log.msg(('reply success, will download software%s' % str(self.topic_reply_num)), log.DEBUG)
log.msg(str(response), log.DEBUG)
request = self.get_down_topic(response, url)
return request
except Exception, e:
log.msg(str(traceback.print_exc()), log.ERROR)
#重调get_topic.同时REPLY_NUM-1,当REPLY_NUM<1时,不在做任何事情.
self.topic_reply_num -= 1
log.msg(('reply success, will download software', str(self.topic_reply_num)), log.DEBUG)
log.msg(str(response), log.DEBUG)
request_topic = self.repeat_reply(response)
return request_topic
示例11: get_apk
def get_apk(self, response):
filename = response.request.meta['filename']
if os.path.exists(''.join([os.getcwd(), os.sep, 'gfan', os.sep, 'download', os.sep, time.strftime('%Y-%m-%d'),os.sep])) is False:
os.mkdir(''.join([os.getcwd(), os.sep, 'gfan', os.sep, 'download', os.sep, time.strftime('%Y-%m-%d'),os.sep]))
filename = ''.join([os.getcwd(), os.sep, 'gfan', os.sep, 'download', os.sep, time.strftime('%Y-%m-%d'), os.sep, str(time.time()).split('.')[0], filename])
open(filename, 'wb').write(response.body)
#下载后在存数据库,确保准确性
topic_url = response.request.meta['topic_url']
hashurl = sql.hash_topic(topic_url)
updatedate = time.strftime('%Y-%m-%d %H:%M:%S')
filename = filename.replace('\\', '\\\\')
insert_sql = sql.insert_softwareinfo(self.platform, hashurl, topic_url, updatedate, filename)
status = handle_db(insert_sql)
print status
#更新topic数据库表
update_topic_url = sql.topicurl_withcrawed(topic_url)
status = handle_db(update_topic_url)
print status
示例12: __init__
def __init__(self):
account_message = get_account(self.platform)
self.username = account_message[2]
self.password = account_message[3]
self.reply_num = 0
#爬虫启动前,执行更新用户使用状态的为1.不准其他线程使用该账号.
update_avail_sql = sql.update_use_byusernamesql(self.username, self.platform, '1')
n = handle_db(update_avail_sql)
log.msg(('<username: %s > is being use' % self.username), log.DEBUG)
#本对象一旦接手到爬虫结束的信号,调用第一个参数这个方法
dispatcher.connect(self.user_relax, signals.spider_closed)
示例13: process_item
def process_item(self, item, spider):
#对传递过来的item进行解析
topic_url = item['topic_url']
referer_url = item['referer_url']
spider_name = item['spider_name']
update_date = time.strftime('%Y-%m-%d %H:%M:%S')
is_crawled = 0
priority_rating = 0
sql = insert_topic_url(spider_name, topic_url,referer_url, update_date, is_crawled, priority_rating)
n = handle_db(sql)
print n
return item
示例14: parse_item
def parse_item(self, response):
#解析出需要下载的包
log.msg(response.url, log.INFO)
try:
if re.findall(u'抱歉,指定的主题不存在或已被删除或正在被审核'.encode('gbk', 'ignore'), response.body, re.I):
#执行更新topic操作
log.msg(('this topic_url(%s) has be removed by admin' % response.url), log.INFO)
update_topic_url = sql.topicurl_withcrawed(topic_url)
status = handle_db(update_topic_url)
return
topic_url = response.request.meta['topic_url']
except:
log.msg(str(traceback.print_exc()), log.INFO)
try:
urls = re.findall('<a href="(forum.php?[^"]+)".*>.*?\.apk', response.body, re.I)
if urls == []:
#没有回复不能下载
log.msg(('this url->%s has not apk file' % response.request.meta['topic_url']), log.INFO)
#如果没有apk文件,该主题链接失去意义,更新is_crawled=1
update_topic_url = sql.topicurl_withcrawed(topic_url)
status = handle_db(update_topic_url)
request_topic = self.repeat_reply(response)
yield request_topic
else:
for url in set(urls):
url = 'http://bbs.mumayi.com/%s' % url
url = url.replace('amp;', '')
request = response.request.replace(url=url, method='get', callback=self.get_apk)
request.meta['url'] = response.url
yield request
except IndexError, e:
log.msg(str(traceback.print_exc()), log.ERROR)
#没有 apk的下载包的地方,更新该链接抓取状态为1
update_crawled_sql = sql.topicurl_withcrawed(response.url)
status = handler_db(update_crawled_sql)
if status['errorNo'] == 1:
log.msg('this url has no apk', log.INFO)
return
示例15: get_topic
def get_topic(self, response):
#根据时间,取为下载的部分进行回复
topic_urls = sql.get_topic_ranone(self.platform, '0')
url = handle_db(topic_urls)['data'][0]
request = response.request.replace(url = url[0], method='get')
request.callback = self.get_code
#request.meta['crawled'] = url[1]
request.meta['topic_url'] = url[0]
#将登陆后这个response对象存起来,方便后面回调本方法,在传入这个对象
request.meta['topic_response'] = response
return request