本文整理汇总了Python中ms_spider_fw.DBSerivce.DBService类的典型用法代码示例。如果您正苦于以下问题:Python DBService类的具体用法?Python DBService怎么用?Python DBService使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了DBService类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: gen_url
def gen_url():
def url_join(t):
if '.html' in t:
return None
else:
temp = t.rsplit('/', 1)
return temp[0] + '/contactinfo/' + temp[1] + '.html'
def change_par(x):
if '//www' in x:
return url_join(x)
elif '//pt' in x:
return url_join(x.replace('//pt', '//www'))
elif '//ru' in x:
return url_join(x.replace('//ru', '//www'))
elif '//es' in x:
return url_join(x.replace('//es', '//www'))
else:
return None
db_g = DBService(dbName=db_name, tableName='aliexpress_temp', **connect_dict)
href_list_t = db_g.getData(var='store_href', distinct=True)
href_s = map(
lambda t: change_par(t), map(
lambda x: x[0], href_list_t
)
)
return list(set(filter(lambda x: 1 if x else 0, href_s)))
示例2: push2DB
def push2DB():
from ms_spider_fw.DBSerivce import DBService
data=getKeyword()
db=DBService('taobaodata','keyword')
tableTitle=['categoryFi', 'categorySe', 'categoryTi']
db.createTable(tableTitle=tableTitle)
db.data2DB(data=data)
示例3: spiderMain
def spiderMain():
"""
# main主程序
:return:
"""
dler = Dler()
dler.downLoad(100)
DB = DBService(dbName='jddata', tableName='thirdPartShopInfo')
DB.createTable(
tableTitle=['productHref', 'companyName', 'shopName', 'shopHref', 'scoreSum', 'scoreProduct', 'scoreProductAvg',
'scoreService',
'scoreServiceAvg', 'scoreExpress', 'scoreExpressAvg', 'gradeHref'])
while True:
que = DBN.queueForDownLoad
if not que.empty():
url, src = que.get()
pPer = PPer(src)
temp = pPer.pageParser()
# proxy_test=temp[0]
# if proxy_test=='-':
# continue
# else:
# print(proxy_test)
print(temp[0])
DB.data2DB(data=[url] + temp)
else:
time.sleep(1)
示例4: productInfo
def productInfo():
db = DBService(dbName='jddata', tableName='jdproductbaseinfo2database')
data = db.getData(var='productHref,commentCount', limit=200000)
proDict = {}
for item in data:
proDict[item[0]] = item[1]
return proDict
示例5: craweldhref
def craweldhref():
db = DBService('elec_platform', 'yms_tmall_shopinfo_com_withoutjudge')
href = db.getData(var='href')
href = [item[0] for item in href]
F = lambda x: x[:-1] if x[-1] == '/' else x
href = map(F, href)
print(len(href))
return href
示例6: companyInfo
def companyInfo():
# 返回公司信息,字典形式
db = DBService(dbName='jddata', tableName='thirdPartShopInfo')
data = db.getData(limit=200000)
data = [item for item in data if not item[2] == '-']
comDict = {}
for item in data:
comDict[item[1]] = item[1:]
return comDict
示例7: gen_url
def gen_url():
DB = DBService(dbName="alibaba", tableName="alibaba_cow_powder_3")
url_detail_page = DB.getData(var="credit_detail_href", distinct=True)
urls = map(lambda x: x[0] if x else " ", url_detail_page)
url = []
for t in urls:
if t:
url.append(t)
return url
示例8: run
def run(thread_count=1000):
run_test(thread_count)
db_server_c = DBService(dbName=db_name, tableName='proxy_ok', **connect_dict)
db_server_c.createTable(tableTitle=['proxy_port', 'test_time'], x='Y')
res = []
print '#'*100
print qu_proxy_ok.qsize()
while qu_proxy_ok.qsize():
res.append(qu_proxy_ok.get())
db_server_c.data2DB(data=res)
示例9: proxy_collection
def proxy_collection():
# get proxies from website
proxies_list_website = pc.get_proxies_from_website()
# at the same time , get other proxies from local database
table_names_proxies = 'proxy_other_source,proxy_you_dai_li'
proxies_list_local = list()
for proxies_t_n in table_names_proxies.split(','):
dbs = DBService(dbName='base', tableName=proxies_t_n, **connect_dict)
proxies_list_local += map(lambda x: x[0], dbs.getData(var='proxy_port'))
return list(set(proxies_list_website + proxies_list_local))
示例10: commentHrefList
def commentHrefList():
db = DBService('elec_platform', 'tmall_baseinfo_everyweek')
judgePageHref = db.getData(var='name,href,judgepage_href')
judgePageHref = [tuple(item) for item in judgePageHref if
not 'http' in item[2]]
judgePageHref = [item for item in judgePageHref if not item[2].isnumeric()]
judgePageHref = set(judgePageHref)
judgePageHref = list(judgePageHref)
print(len(judgePageHref))
return judgePageHref
示例11: run
def run(thread_count=20000):
muti_thread_test(thread_count)
db_server_c = DBService(dbName=db_name, tableName='proxy_ok', **connect_dict)
db_server_c.createTable(tableTitle=['proxy_port', 'test_time'], x='Y')
res = []
while qu_proxy_ok.qsize():
res.append([
qu_proxy_ok.get(),
time.strftime('%Y-%m-%d %X', time.localtime())
])
db_server_c.data2DB(data=res)
示例12: begin
def begin():
db = DBService(dbName='jddata', tableName='thirdPartShopInfo')
data = db.getData()
title = db.getTableTitle()[1:-2]
S = set()
for item in data:
S.add(tuple(item[1:-2]))
data = []
for item in S:
data.append(list(item))
csv = CSV()
csv.writeCsv(savePath='D:/spider', fileTitle=title, data=data, fileName='jdData')
示例13: sumCommentCount
def sumCommentCount():
db = DBService(dbName='jddata', tableName='thirdPartShopInfoAddCommnetCount')
# db = DBService(dbName='jddata', tableName='thirdPartShopInfoAddtest')
data = db.getData(var='shopName,commnetCount')
dict = {}
for item in data:
if item[0] in dict.keys():
dict[item[0]] = int(item[1]) + dict[item[0]]
else:
dict[item[0]] = int(item[1])
data = []
for item in dict.items():
data.append([item[0], item[1]])
csv = CSV()
csv.writeCsv(savePath='D:/spider', fileTitle=['shopName', 'commnetCount'], data=data, fileName='jdDataSum')
示例14: startUrlList
def startUrlList(self):
"""
# 方法重载
:return:
"""
dbs = DBService(dbName='jddata', tableName='jdproductbaseinfo2database')
data = dbs.getData(var='productHref,sku', distinct=True)
dataThirdPartBase = [item[0] for item in data if len(item[1]) >= 10]
dataHadCrawled = DBService(dbName='jddata', tableName='thirdPartShopInfo').getData(var='productHref')
if not dataHadCrawled:
return dataThirdPartBase
dataHadCrawled = set([item[0] for item in dataHadCrawled])
dataThirdPart = [item for item in dataThirdPartBase if item not in dataHadCrawled]
dataThirdPart = [item for item in dataThirdPart if item[:4] == 'http']
# print len(dataThirdPart)
return dataThirdPart
示例15: savePicture
def savePicture():
from screenShot import saveScreenShot
from ms_spider_fw.DBSerivce import DBService
import time
import random
db = DBService(dbName='tmalldata', tableName='tmall_baseinfo_realtime')
data = db.getData(var='name,href', distinct=True)
nameD = map(lambda x: x[0], data)
data = map(lambda x: x[1], data)
print(len(data))
dri = None
for url in data:
name=nameD[data.index(url)]
print(name)
dri = saveScreenShot(url, driver=dri,title=name)
time.sleep(abs(random.gauss(3, 2)))