本文整理汇总了Python中ms_spider_fw.DBSerivce.DBService.data2DB方法的典型用法代码示例。如果您正苦于以下问题:Python DBService.data2DB方法的具体用法?Python DBService.data2DB怎么用?Python DBService.data2DB使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类ms_spider_fw.DBSerivce.DBService
的用法示例。
在下文中一共展示了DBService.data2DB方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: spiderMain
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import data2DB [as 别名]
def spiderMain():
"""
# main主程序
:return:
"""
dler = Dler()
dler.downLoad(100)
DB = DBService(dbName='jddata', tableName='thirdPartShopInfo')
DB.createTable(
tableTitle=['productHref', 'companyName', 'shopName', 'shopHref', 'scoreSum', 'scoreProduct', 'scoreProductAvg',
'scoreService',
'scoreServiceAvg', 'scoreExpress', 'scoreExpressAvg', 'gradeHref'])
while True:
que = DBN.queueForDownLoad
if not que.empty():
url, src = que.get()
pPer = PPer(src)
temp = pPer.pageParser()
# proxy_test=temp[0]
# if proxy_test=='-':
# continue
# else:
# print(proxy_test)
print(temp[0])
DB.data2DB(data=[url] + temp)
else:
time.sleep(1)
示例2: push2DB
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import data2DB [as 别名]
def push2DB():
from ms_spider_fw.DBSerivce import DBService
data=getKeyword()
db=DBService('taobaodata','keyword')
tableTitle=['categoryFi', 'categorySe', 'categoryTi']
db.createTable(tableTitle=tableTitle)
db.data2DB(data=data)
示例3: run
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import data2DB [as 别名]
def run(thread_count=1000):
run_test(thread_count)
db_server_c = DBService(dbName=db_name, tableName='proxy_ok', **connect_dict)
db_server_c.createTable(tableTitle=['proxy_port', 'test_time'], x='Y')
res = []
print '#'*100
print qu_proxy_ok.qsize()
while qu_proxy_ok.qsize():
res.append(qu_proxy_ok.get())
db_server_c.data2DB(data=res)
示例4: run
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import data2DB [as 别名]
def run(thread_count=20000):
muti_thread_test(thread_count)
db_server_c = DBService(dbName=db_name, tableName='proxy_ok', **connect_dict)
db_server_c.createTable(tableTitle=['proxy_port', 'test_time'], x='Y')
res = []
while qu_proxy_ok.qsize():
res.append([
qu_proxy_ok.get(),
time.strftime('%Y-%m-%d %X', time.localtime())
])
db_server_c.data2DB(data=res)
示例5: dataGen
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import data2DB [as 别名]
def dataGen():
comDict = companyInfo()
proDict = productInfo()
dict = {}
for item in comDict.items():
if item[0] in proDict.keys():
dict[item[0]] = comDict[item[0]] + [proDict[item[0]]]
else:
continue
data = [item[1] for item in dict.items()]
db1 = DBService(dbName='jddata', tableName='thirdPartShopInfo')
title = db1.getTableTitle()
title = title + ['commnetCount']
print(title)
db2 = DBService(dbName='jddata', tableName='thirdPartShopInfoAddtest')
db2.createTable(tableTitle=title)
db2.data2DB(data=data)
示例6: spiderMain
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import data2DB [as 别名]
def spiderMain():
"""
# main主程序
:return:
"""
dler = Dler()
dler.downLoad(10)
DB = DBService(#host='localhost',
# user='root',
# passwd='',
# charset='utf8',
# dbName='spider',
dbName='alibaba',
tableName='alibaba_cow_powder_3')
DB.createTable(tableTitle=
['company_name',
'keyword',
'sale',
'href',
'member_id',
'offer_id',
'cxt_year',
'credit_detail_href',
'goods_from',
'product_title_sample',
'product_detail_sample',
'location',
'url_base'])
while True:
que = DBN.queueForDownLoad
if not que.empty():
url, src = que.get()
pPer = PPer(src)
temp = pPer.pageParser()
if temp:
temp = map(lambda x: x + [url], temp)
DB.data2DB(data=temp)
print(u'++成功:%s'%url)
else:
print(u'--失败:%s'%url)
else:
time.sleep(1)
示例7: get_parser
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import data2DB [as 别名]
def get_parser(url, driver):
import random
time.sleep(abs(random.gauss(5, 5)))
driver.get(url)
print(driver.title)
contacts_name = "-"
contacts_sex = "-"
contacts_job = "-"
try:
contacts_name = driver.find_element_by_css_selector(".contact-info .membername").text
contacts_sex = driver.find_element_by_css_selector(".contact-info>dl>dd").text.split(" ")[1]
contacts_job = driver.find_element_by_css_selector(".contact-info>dl>dd").text.split("(")[1]
contacts_job = contacts_job.split(")")[0]
except:
pass
phone_frames = driver.find_elements_by_css_selector(".contcat-desc dl")
cell_phone = "-"
tel_phone = "-"
fax_phone = "-"
shop_addr = "-"
for i in range(len(phone_frames)):
text = driver.find_element_by_css_selector(".contcat-desc dl:nth-child(" + str(i + 1) + ") dt").text.strip()
if text == u"移动电话:":
cell_phone = driver.find_element_by_css_selector(".contcat-desc dl:nth-child(" + str(i + 1) + ") dd").text
continue
elif text == u"电 话:":
tel_phone = driver.find_element_by_css_selector(".contcat-desc dl:nth-child(" + str(i + 1) + ") dd").text
continue
elif text == u"传 真:":
fax_phone = driver.find_element_by_css_selector(".contcat-desc dl:nth-child(" + str(i + 1) + ") dd").text
continue
elif text == u"地 址:":
shop_addr = driver.find_element_by_css_selector(".contcat-desc dl:nth-child(" + str(i + 1) + ") dd").text
continue
spider_time = time.strftime("%Y-%m-%d %X", time.localtime())
result = [contacts_name, contacts_sex, contacts_job, cell_phone, tel_phone, fax_phone, shop_addr, spider_time, url]
DB = DBService(dbName="alibaba", tableName="alibaba_cow_powder_phone")
DB.data2DB(data=result)
示例8: getCategoryAndStartUrl
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import data2DB [as 别名]
def getCategoryAndStartUrl():
import json
global queue_for_url_targetBase
queue_for_url_targetBase = Queue(0)
src = myUrlOpen.requestByProxy('http://dc.3.cn/category/get?callback=getCategoryCallback')
srcTemp = src.split('(', 1)[1][:-1]
srcTemp = srcTemp.decode('gbk', 'ignore')
srcJson = json.loads(srcTemp)['data']
category = []
for Fi in srcJson:
targetFi = Fi['s']
for Se in targetFi:
targetSeTitle = Se['n']
targetSe = Se['s']
for Ti in targetSe:
targetTiTitle = Ti['n']
targetTi = Ti['s']
for Fo in targetTi:
targetFoTitle = Fo['n']
categoryTemp = [targetSeTitle.split('|')[1], targetSeTitle.split('|')[0],
targetTiTitle.split('|')[1], targetTiTitle.split('|')[0],
targetFoTitle.split('|')[1], targetFoTitle.split('|')[0]]
category.append(categoryTemp)
queue_for_url_targetBase.put((targetFoTitle.split('|')[1], targetFoTitle.split('|')[0]))
db = DBService(dbName='jddata', tableName='jdkeyword')
db.createTable(tableTitle=['category_fi_name', 'category_fi', 'category_se_name', 'category_se', 'category_ti_name',
'category_ti'])
db.data2DB(data=category)
# for item in category:
# print(item)
# try:
# db.data2DB(data=item)
# except:continue
# print('=' * 50)
return category
示例9: craweldhref
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import data2DB [as 别名]
return judgePageHref
def craweldhref():
db = DBService('elec_platform', 'yms_tmall_shopinfo_com_withoutjudge')
href = db.getData(var='href')
href = [item[0] for item in href]
F = lambda x: x[:-1] if x[-1] == '/' else x
href = map(F, href)
print(len(href))
return href
def href():
temp1 = commentHrefList()
temp2 = craweldhref()
temp2 = set(temp2)
temp3 = []
for item in temp1:
if not item[1] in temp2:
temp3.append(list(item))
else:
continue
temp3=[[item[0],item[1]+'/','http://rate.taobao.com/user-rate-'+item[2]+'.htm']for item in temp3]
return temp3
temp = href()
db=DBService('elec_platform', 'yms_tmall_shopinfo_com_withoutjudge')
db.data2DB(data=temp,tableTitle=['name','href','judgepage_href'])
示例10: putDataIntoDB
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import data2DB [as 别名]
def putDataIntoDB(path):
data = getData(path=path)
dbs = DBService(dbName='elec_platform', tableName='tmall_baseinfo_weekly_2016')
dbs.data2DB(data=data)
示例11: map
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import data2DB [as 别名]
map(lambda x: end_day - timedelta(days=x), range(1, days + 1)))
_start_time.reverse()
_end_time.reverse()
time_step = zip(_start_time, _end_time)
t_k = target_keyword(industry)
for item in time_step:
start, end = item
print start
for k_w in t_k:
try:
print k_w[1]
api = weibo_api(start_time=start, end_time=end, key_word=k_w[0] + ' ' + industry)
response = requests.get(url=api, headers=headers)
db_server.data2DB(data=[add_info(response.content, industry, k_w[1]),
time.strftime('%Y-%m-%d %X', time.localtime())])
page_total = json.loads(response.content).get('total_number')
if not page_total:
continue
for i in range(2, 101 if page_total / 10 > 101 else page_total / 10):
try:
api_t = weibo_api(start_time=start, end_time=end, page=i, key_word=k_w[0] + ' ' + industry)
response_t = requests.get(url=api_t, headers=headers)
db_server.data2DB(data=[add_info(response_t.content, industry, k_w[1]),
time.strftime('%Y-%m-%d %X', time.localtime())])
print 'is the ' + str(i) + ' request sucessful.'
except Exception, e:
print e.message
continue
示例12:
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import data2DB [as 别名]
it.get("userProvince"),
it.get("userRegisterTime"),
it.get("viewCount"),
it.get("orderId"),
it.get("isReplyGrade"),
it.get("nickname"),
it.get("userClient"),
it.get("productColor"),
it.get("productSize"),
it.get("integral"),
it.get("anonymousFlag"),
it.get("userLevelName"),
it.get("recommend"),
re.sub(re_sub_p, '', it.get("userClientShow")),
it.get("isMobile"),
st[2],
st[3],
st[0],
st[1],
it.get("days"),
u'手机']
res.append(t)
try:
db_server.data2DB(data=t)
except Exception,e:
print e.message
if __name__ == "__main__":
for item in data:
extract_info(item)