本文整理汇总了Python中ms_spider_fw.DBSerivce.DBService.getData方法的典型用法代码示例。如果您正苦于以下问题:Python DBService.getData方法的具体用法?Python DBService.getData怎么用?Python DBService.getData使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类ms_spider_fw.DBSerivce.DBService
的用法示例。
在下文中一共展示了DBService.getData方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: productInfo
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
def productInfo():
db = DBService(dbName='jddata', tableName='jdproductbaseinfo2database')
data = db.getData(var='productHref,commentCount', limit=200000)
proDict = {}
for item in data:
proDict[item[0]] = item[1]
return proDict
示例2: gen_url
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
def gen_url():
def url_join(t):
if '.html' in t:
return None
else:
temp = t.rsplit('/', 1)
return temp[0] + '/contactinfo/' + temp[1] + '.html'
def change_par(x):
if '//www' in x:
return url_join(x)
elif '//pt' in x:
return url_join(x.replace('//pt', '//www'))
elif '//ru' in x:
return url_join(x.replace('//ru', '//www'))
elif '//es' in x:
return url_join(x.replace('//es', '//www'))
else:
return None
db_g = DBService(dbName=db_name, tableName='aliexpress_temp', **connect_dict)
href_list_t = db_g.getData(var='store_href', distinct=True)
href_s = map(
lambda t: change_par(t), map(
lambda x: x[0], href_list_t
)
)
return list(set(filter(lambda x: 1 if x else 0, href_s)))
示例3: craweldhref
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
def craweldhref():
db = DBService('elec_platform', 'yms_tmall_shopinfo_com_withoutjudge')
href = db.getData(var='href')
href = [item[0] for item in href]
F = lambda x: x[:-1] if x[-1] == '/' else x
href = map(F, href)
print(len(href))
return href
示例4: gen_url
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
def gen_url():
DB = DBService(dbName="alibaba", tableName="alibaba_cow_powder_3")
url_detail_page = DB.getData(var="credit_detail_href", distinct=True)
urls = map(lambda x: x[0] if x else " ", url_detail_page)
url = []
for t in urls:
if t:
url.append(t)
return url
示例5: companyInfo
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
def companyInfo():
# 返回公司信息,字典形式
db = DBService(dbName='jddata', tableName='thirdPartShopInfo')
data = db.getData(limit=200000)
data = [item for item in data if not item[2] == '-']
comDict = {}
for item in data:
comDict[item[1]] = item[1:]
return comDict
示例6: commentHrefList
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
def commentHrefList():
db = DBService('elec_platform', 'tmall_baseinfo_everyweek')
judgePageHref = db.getData(var='name,href,judgepage_href')
judgePageHref = [tuple(item) for item in judgePageHref if
not 'http' in item[2]]
judgePageHref = [item for item in judgePageHref if not item[2].isnumeric()]
judgePageHref = set(judgePageHref)
judgePageHref = list(judgePageHref)
print(len(judgePageHref))
return judgePageHref
示例7: proxy_collection
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
def proxy_collection():
# get proxies from website
proxies_list_website = pc.get_proxies_from_website()
# at the same time , get other proxies from local database
table_names_proxies = 'proxy_other_source,proxy_you_dai_li'
proxies_list_local = list()
for proxies_t_n in table_names_proxies.split(','):
dbs = DBService(dbName='base', tableName=proxies_t_n, **connect_dict)
proxies_list_local += map(lambda x: x[0], dbs.getData(var='proxy_port'))
return list(set(proxies_list_website + proxies_list_local))
示例8: begin
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
def begin():
db = DBService(dbName='jddata', tableName='thirdPartShopInfo')
data = db.getData()
title = db.getTableTitle()[1:-2]
S = set()
for item in data:
S.add(tuple(item[1:-2]))
data = []
for item in S:
data.append(list(item))
csv = CSV()
csv.writeCsv(savePath='D:/spider', fileTitle=title, data=data, fileName='jdData')
示例9: sumCommentCount
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
def sumCommentCount():
db = DBService(dbName='jddata', tableName='thirdPartShopInfoAddCommnetCount')
# db = DBService(dbName='jddata', tableName='thirdPartShopInfoAddtest')
data = db.getData(var='shopName,commnetCount')
dict = {}
for item in data:
if item[0] in dict.keys():
dict[item[0]] = int(item[1]) + dict[item[0]]
else:
dict[item[0]] = int(item[1])
data = []
for item in dict.items():
data.append([item[0], item[1]])
csv = CSV()
csv.writeCsv(savePath='D:/spider', fileTitle=['shopName', 'commnetCount'], data=data, fileName='jdDataSum')
示例10: startUrlList
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
def startUrlList(self):
"""
# 方法重载
:return:
"""
dbs = DBService(dbName='jddata', tableName='jdproductbaseinfo2database')
data = dbs.getData(var='productHref,sku', distinct=True)
dataThirdPartBase = [item[0] for item in data if len(item[1]) >= 10]
dataHadCrawled = DBService(dbName='jddata', tableName='thirdPartShopInfo').getData(var='productHref')
if not dataHadCrawled:
return dataThirdPartBase
dataHadCrawled = set([item[0] for item in dataHadCrawled])
dataThirdPart = [item for item in dataThirdPartBase if item not in dataHadCrawled]
dataThirdPart = [item for item in dataThirdPart if item[:4] == 'http']
# print len(dataThirdPart)
return dataThirdPart
示例11: savePicture
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
def savePicture():
from screenShot import saveScreenShot
from ms_spider_fw.DBSerivce import DBService
import time
import random
db = DBService(dbName='tmalldata', tableName='tmall_baseinfo_realtime')
data = db.getData(var='name,href', distinct=True)
nameD = map(lambda x: x[0], data)
data = map(lambda x: x[1], data)
print(len(data))
dri = None
for url in data:
name=nameD[data.index(url)]
print(name)
dri = saveScreenShot(url, driver=dri,title=name)
time.sleep(abs(random.gauss(3, 2)))
示例12: DBService
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
# connect_dict = {'host': '10.118.187.12', 'user': 'admin', 'passwd': 'admin', 'charset': 'utf8'}
connect_dict = {'host': 'localhost', 'user': 'root', 'passwd': '', 'charset': 'utf8'}
# db_server = DBService(dbName=db_name, tableName=table_name, **connect_dict)
# proxy_list = map(lambda x: x[0], db_server.getData(var='proxy_port', distinct=True))
# for p in proxy_list:
# qu_proxy_test.put(p)
patt_ip = re.compile(r'(?<![\.\d])(?:\d{1,3}\.){3}\d{1,3}(?![\.\d])')
proxy_list = []
for table_name in table_name_s.split(','):
print table_name
db_server = DBService(dbName=db_name, tableName=table_name, **connect_dict)
if db_server.isTableExist():
proxy_list += map(lambda x: x[0], db_server.getData(var='proxy_port'))
proxy_list_t=list(set(proxy_list))
for p in proxy_list_t:
qu_proxy_test.put(p)
def original_ip_address():
t = requests.get('http://httpbin.org/ip').text
return json.loads(t).get('origin')
original = original_ip_address()
def test():
示例13: reload
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
from ms_spider_fw.DBSerivce import DBService
import json
import re
import requests
import sys
from datetime import datetime
reload(sys)
sys.setdefaultencoding('utf8')
db_server = DBService(dbName='test', tableName='weibo_cellphone') # , **connect_dict)
data = db_server.getData(var='detail_json', limit=20)
data = filter(lambda x: 1 if x[0][0] == '{' else 0, filter(lambda x: 1 if x[0] else 0, data))
re_sub_p = re.compile('<.+?>')
re_sub_t = re.compile('\+\d+?\s')
def time_format(ori):
if not ori:
return ''
o = re.sub(re_sub_t, '', ori)
s = datetime.strptime(o, '%a %b %d %H:%M:%S %Y')
return s.strftime('%Y-%m-%d %H:%M:%S')
# extract_info from json string
def extract_info(x):
示例14: DBService
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
import threading
import time
from Queue import Queue as qu
from ms_proxy import proxy_test
from ms_spider_fw.DBSerivce import DBService
# config text
db_name = 'b2c_base'
# give some tables name to extract proxy list to test , different table name be combined use ','
table_name_s = 'proxy_you_dai_li,proxy_xi_ci_dai_li'
connect_dict = {'host': '10.118.187.12', 'user': 'admin', 'passwd': 'admin', 'charset': 'utf8'}
proxy_list = []
for table_name in table_name_s.split(','):
db_server = DBService(dbName=db_name, tableName=table_name, **connect_dict)
proxy_list += map(lambda x: x[0], db_server.getData(var='proxy_port', distinct=True))
# with open("d:/proxy_2.txt", 'r')as f:
# t = f.read()
# proxy_list = t.split('\n')
# script
qu_proxy_test = qu(0)
qu_proxy_ok = qu(0)
for t in set(proxy_list):
qu_proxy_test.put(t)
def test():
while qu_proxy_test.qsize():
示例15: int
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
#coding:utf8
__author__ = '613108'
from ms_spider_fw.DBSerivce import DBService
dbs=DBService(dbName='elec_platform',tableName='tmall_baseinfo_everyweek')
data=dbs.getData()
data=[item for item in data if int(item[-2])>=35]
print(len(data))