本文整理汇总了Python中ms_spider_fw.DBSerivce.DBService.createTable方法的典型用法代码示例。如果您正苦于以下问题:Python DBService.createTable方法的具体用法?Python DBService.createTable怎么用?Python DBService.createTable使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类ms_spider_fw.DBSerivce.DBService
的用法示例。
在下文中一共展示了DBService.createTable方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: push2DB
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import createTable [as 别名]
def push2DB():
from ms_spider_fw.DBSerivce import DBService
data=getKeyword()
db=DBService('taobaodata','keyword')
tableTitle=['categoryFi', 'categorySe', 'categoryTi']
db.createTable(tableTitle=tableTitle)
db.data2DB(data=data)
示例2: spiderMain
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import createTable [as 别名]
def spiderMain():
"""
# main主程序
:return:
"""
dler = Dler()
dler.downLoad(100)
DB = DBService(dbName='jddata', tableName='thirdPartShopInfo')
DB.createTable(
tableTitle=['productHref', 'companyName', 'shopName', 'shopHref', 'scoreSum', 'scoreProduct', 'scoreProductAvg',
'scoreService',
'scoreServiceAvg', 'scoreExpress', 'scoreExpressAvg', 'gradeHref'])
while True:
que = DBN.queueForDownLoad
if not que.empty():
url, src = que.get()
pPer = PPer(src)
temp = pPer.pageParser()
# proxy_test=temp[0]
# if proxy_test=='-':
# continue
# else:
# print(proxy_test)
print(temp[0])
DB.data2DB(data=[url] + temp)
else:
time.sleep(1)
示例3: run
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import createTable [as 别名]
def run(thread_count=1000):
run_test(thread_count)
db_server_c = DBService(dbName=db_name, tableName='proxy_ok', **connect_dict)
db_server_c.createTable(tableTitle=['proxy_port', 'test_time'], x='Y')
res = []
print '#'*100
print qu_proxy_ok.qsize()
while qu_proxy_ok.qsize():
res.append(qu_proxy_ok.get())
db_server_c.data2DB(data=res)
示例4: run
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import createTable [as 别名]
def run(thread_count=20000):
muti_thread_test(thread_count)
db_server_c = DBService(dbName=db_name, tableName='proxy_ok', **connect_dict)
db_server_c.createTable(tableTitle=['proxy_port', 'test_time'], x='Y')
res = []
while qu_proxy_ok.qsize():
res.append([
qu_proxy_ok.get(),
time.strftime('%Y-%m-%d %X', time.localtime())
])
db_server_c.data2DB(data=res)
示例5: dataGen
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import createTable [as 别名]
def dataGen():
comDict = companyInfo()
proDict = productInfo()
dict = {}
for item in comDict.items():
if item[0] in proDict.keys():
dict[item[0]] = comDict[item[0]] + [proDict[item[0]]]
else:
continue
data = [item[1] for item in dict.items()]
db1 = DBService(dbName='jddata', tableName='thirdPartShopInfo')
title = db1.getTableTitle()
title = title + ['commnetCount']
print(title)
db2 = DBService(dbName='jddata', tableName='thirdPartShopInfoAddtest')
db2.createTable(tableTitle=title)
db2.data2DB(data=data)
示例6: spiderMain
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import createTable [as 别名]
def spiderMain():
"""
# main主程序
:return:
"""
dler = Dler()
dler.downLoad(10)
DB = DBService(#host='localhost',
# user='root',
# passwd='',
# charset='utf8',
# dbName='spider',
dbName='alibaba',
tableName='alibaba_cow_powder_3')
DB.createTable(tableTitle=
['company_name',
'keyword',
'sale',
'href',
'member_id',
'offer_id',
'cxt_year',
'credit_detail_href',
'goods_from',
'product_title_sample',
'product_detail_sample',
'location',
'url_base'])
while True:
que = DBN.queueForDownLoad
if not que.empty():
url, src = que.get()
pPer = PPer(src)
temp = pPer.pageParser()
if temp:
temp = map(lambda x: x + [url], temp)
DB.data2DB(data=temp)
print(u'++成功:%s'%url)
else:
print(u'--失败:%s'%url)
else:
time.sleep(1)
示例7: getCategoryAndStartUrl
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import createTable [as 别名]
def getCategoryAndStartUrl():
import json
global queue_for_url_targetBase
queue_for_url_targetBase = Queue(0)
src = myUrlOpen.requestByProxy('http://dc.3.cn/category/get?callback=getCategoryCallback')
srcTemp = src.split('(', 1)[1][:-1]
srcTemp = srcTemp.decode('gbk', 'ignore')
srcJson = json.loads(srcTemp)['data']
category = []
for Fi in srcJson:
targetFi = Fi['s']
for Se in targetFi:
targetSeTitle = Se['n']
targetSe = Se['s']
for Ti in targetSe:
targetTiTitle = Ti['n']
targetTi = Ti['s']
for Fo in targetTi:
targetFoTitle = Fo['n']
categoryTemp = [targetSeTitle.split('|')[1], targetSeTitle.split('|')[0],
targetTiTitle.split('|')[1], targetTiTitle.split('|')[0],
targetFoTitle.split('|')[1], targetFoTitle.split('|')[0]]
category.append(categoryTemp)
queue_for_url_targetBase.put((targetFoTitle.split('|')[1], targetFoTitle.split('|')[0]))
db = DBService(dbName='jddata', tableName='jdkeyword')
db.createTable(tableTitle=['category_fi_name', 'category_fi', 'category_se_name', 'category_se', 'category_ti_name',
'category_ti'])
db.data2DB(data=category)
# for item in category:
# print(item)
# try:
# db.data2DB(data=item)
# except:continue
# print('=' * 50)
return category
示例8: DBService
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import createTable [as 别名]
import Queue
json_file_queue = Queue.Queue(0)
connect_jd = pymysql.connect(
host='10.118.187.12',
user='admin',
passwd='admin',
database='platform_data'
)
connect_dict = {'host': '10.118.187.12', 'user': 'admin', 'passwd': 'admin', 'charset': 'utf8'}
dbs = DBService(dbName='platform_data', tableName='jd_data_temp_0326', **connect_dict)
dbs.createTable(
tableTitle=
map(lambda x: x.strip(),
'shop_name, addr, com_name, shop_href, cate_0, score_summary, '
'express_score, product_score, service_score,product_href, vender_id, '
'sku_id, size_count'.split(','))
)
def get_min_max_id():
sql_min = 'SELECT MIN(id) FROM jd_product_detail'
sql_max = 'SELECT MAX(id) FROM jd_product_detail'
cur = connect_jd.cursor()
cur.execute(sql_min)
min_id = cur.fetchall()
cur.execute(sql_max)
max_id = cur.fetchall()
cur.close()
return min_id[0][0], max_id[0][0]
示例9: DBService
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import createTable [as 别名]
# config_text
db_name = 'platform_data'
table_name = 'suning'
table_title = 'product_url,catalogue,sub_catalogue,product_title,promotion_desc,origin_price,price,' \
'product_stars,comment_count,sending_service,other_service,product_params,shop_name,' \
'shop_href,product_rating,product_rating_avg,serice_rating,service_rating_avg,express_rating,' \
'express_rating_avg,com_name_tel,crawl_time'
url_start = 'http://www.suning.com/emall/pgv_10052_10051_1_.html' # start url for crawl,string
connect_dict = {'host': '10.118.187.12', 'user': 'admin', 'passwd': 'admin', 'charset': 'utf8'}
# script
db_server = DBService(dbName=db_name, tableName=table_name, **connect_dict)
if not db_server.isTableExist():
db_server.createTable(tableTitle=table_title.split(','))
class Handler(BaseHandler):
crawl_config = {
}
@every(minutes=24 * 60)
def on_start(self):
self.crawl(url_start, callback=self.step_first)
@config(age=2 * 24 * 60 * 60)
def step_first(self, response):
d = response.doc
for t in d('.listLeft>dl>dd>span>a').items():
self.crawl(t.attr.href, callback=self.step_second)
示例10: DBService
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import createTable [as 别名]
db_name = 'address'
table_name_1 = 'ershoufang_58city_baseinfo'
table_name_2 = 'ershoufang_58city_detail'
table_title_1 = 'detail,crawl_time'
table_title_2 = 'url,detail,crawl_time'
url_start = 'http://www.58.com/ershoufang/changecity/'
# connect string , usually no need to modify
connect_dict = {'host': '10.118.187.12', 'user': 'admin',
'passwd': 'admin', 'charset': 'utf8'}
db_server_1 = DBService(dbName=db_name, tableName=table_name_1, **connect_dict)
db_server_2 = DBService(dbName=db_name, tableName=table_name_2, **connect_dict)
# if create table for store result in mysql , no need to be changed
if not db_server_1.isTableExist():
db_server_1.createTable(tableTitle=table_title_1.split(','))
if not db_server_2.isTableExist():
db_server_2.createTable(tableTitle=table_title_2.split(','))
pat_num = re.compile('\d+')
pat_replace_space = re.compile('\s+?')
pat_comment = re.compile('var arr=(.+?)\;')
class Handler(BaseHandler):
crawl_config = {
# 'proxy': '10.10.10.10:80',
'headers': {
'User-Agent': 'User-Agent:Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) '
'Gecko/20100101 Firefox/4.0.1'
示例11: DBService
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import createTable [as 别名]
"""
from pyspider.libs.base_handler import *
from ms_spider_fw.DBSerivce import DBService
import time
db_server = DBService(dbName='b2c_base', tableName='b2c_website_list_meidebi', host='10.118.187.12',
user='admin', passwd='admin', charset='utf8')
# create table for store result in mysql
db_server.createTable(tableTitle=[
'name',
'summary',
'url',
'evaluation_num',
'total_score',
'quality_score',
'express_service_score',
'customer_service_score',
'crawl_time'
])
class Handler(BaseHandler):
crawl_config = {
}
@every(minutes=30 * 24 * 60)
def on_start(self):
self.crawl('http://www.meidebi.com/company/', callback=self.step_first)
示例12: extract_info
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import createTable [as 别名]
"productsize",
"integral",
"anonymousflag",
"userlevelname",
"recommend",
"userclientshow",
"ismobile",
"negwords",
"negwordsnum",
"goodwords",
"goodwordsnum",
"days",
"industry"
]
if not db_server.isTableExist():
db_server.createTable(tableTitle=table_title)
# re_sub_p = re.compile('<.+?>')
re_sub_p = re.compile(u'回复|#.+?#|@.+?[\s::]|\[.+?\]|@.+$|\s+?')
res = list()
def extract_info(x):
try:
d_t = json.loads(x)
d = d_t['comments']
except Exception, e:
# raise ValueError('No comments exists!')
return None
if isinstance(d, list):
示例13: PPer
# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import createTable [as 别名]
# if not que.empty():
# url, src = que.get()
# pPer = PPer(src)
# temp = pPer.pageParser()
# if temp:
# temp = map(lambda x: x + [url], temp)
# # DB.data2DB(data=temp)
# print(u'++成功:%s' % url)
# else:
# print(u'--失败:%s' % url)
# else:
# time.sleep(1)
if __name__ == "__main__":
DB = DBService(dbName="alibaba", tableName="alibaba_cow_powder_phone")
DB.createTable(
tableTitle=[
"contacts_name",
"contacts_sex",
"contacts_job",
"cell_phone",
"tel_phone",
"fax_phone",
"shop_addr",
"spider_time",
"url",
]
)
temp_main()