当前位置: 首页>>代码示例>>Python>>正文


Python DBService.getData方法代码示例

本文整理汇总了Python中ms_spider_fw.DBSerivce.DBService.getData方法的典型用法代码示例。如果您正苦于以下问题:Python DBService.getData方法的具体用法?Python DBService.getData怎么用?Python DBService.getData使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在ms_spider_fw.DBSerivce.DBService的用法示例。


在下文中一共展示了DBService.getData方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: productInfo

# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
def productInfo():
    db = DBService(dbName='jddata', tableName='jdproductbaseinfo2database')
    data = db.getData(var='productHref,commentCount', limit=200000)
    proDict = {}
    for item in data:
        proDict[item[0]] = item[1]
    return proDict
开发者ID:yangmingsong,项目名称:python,代码行数:9,代码来源:jdDataAna.py

示例2: gen_url

# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
def gen_url():
    def url_join(t):
        if '.html' in t:
            return None
        else:
            temp = t.rsplit('/', 1)
            return temp[0] + '/contactinfo/' + temp[1] + '.html'

    def change_par(x):
        if '//www' in x:
            return url_join(x)
        elif '//pt' in x:
            return url_join(x.replace('//pt', '//www'))
        elif '//ru' in x:
            return url_join(x.replace('//ru', '//www'))
        elif '//es' in x:
            return url_join(x.replace('//es', '//www'))
        else:
            return None

    db_g = DBService(dbName=db_name, tableName='aliexpress_temp', **connect_dict)
    href_list_t = db_g.getData(var='store_href', distinct=True)
    href_s = map(
            lambda t: change_par(t), map(
                    lambda x: x[0], href_list_t
            )
    )
    return list(set(filter(lambda x: 1 if x else 0, href_s)))
开发者ID:yangmingsong,项目名称:python,代码行数:30,代码来源:contact_info_aliexpress_nops_addcookies.py

示例3: craweldhref

# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
def craweldhref():
    db = DBService('elec_platform', 'yms_tmall_shopinfo_com_withoutjudge')
    href = db.getData(var='href')
    href = [item[0] for item in href]
    F = lambda x: x[:-1] if x[-1] == '/' else x
    href = map(F, href)
    print(len(href))
    return href
开发者ID:yangmingsong,项目名称:python,代码行数:10,代码来源:spider.py

示例4: gen_url

# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
def gen_url():
    DB = DBService(dbName="alibaba", tableName="alibaba_cow_powder_3")
    url_detail_page = DB.getData(var="credit_detail_href", distinct=True)
    urls = map(lambda x: x[0] if x else " ", url_detail_page)
    url = []
    for t in urls:
        if t:
            url.append(t)
    return url
开发者ID:yangmingsong,项目名称:python,代码行数:11,代码来源:spider_third_page.py

示例5: companyInfo

# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
def companyInfo():
    # 返回公司信息,字典形式
    db = DBService(dbName='jddata', tableName='thirdPartShopInfo')
    data = db.getData(limit=200000)
    data = [item for item in data if not item[2] == '-']
    comDict = {}
    for item in data:
        comDict[item[1]] = item[1:]
    return comDict
开发者ID:yangmingsong,项目名称:python,代码行数:11,代码来源:jdDataAna.py

示例6: commentHrefList

# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
def commentHrefList():
    db = DBService('elec_platform', 'tmall_baseinfo_everyweek')
    judgePageHref = db.getData(var='name,href,judgepage_href')
    judgePageHref = [tuple(item) for item in judgePageHref if
                     not 'http' in item[2]]
    judgePageHref = [item for item in judgePageHref if not item[2].isnumeric()]
    judgePageHref = set(judgePageHref)
    judgePageHref = list(judgePageHref)
    print(len(judgePageHref))
    return judgePageHref
开发者ID:yangmingsong,项目名称:python,代码行数:12,代码来源:spider.py

示例7: proxy_collection

# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
def proxy_collection():
    # get proxies from website
    proxies_list_website = pc.get_proxies_from_website()
    # at the same time , get other proxies from local database
    table_names_proxies = 'proxy_other_source,proxy_you_dai_li'
    proxies_list_local = list()
    for proxies_t_n in table_names_proxies.split(','):
        dbs = DBService(dbName='base', tableName=proxies_t_n, **connect_dict)
        proxies_list_local += map(lambda x: x[0], dbs.getData(var='proxy_port'))
    return list(set(proxies_list_website + proxies_list_local))
开发者ID:yangmingsong,项目名称:python,代码行数:12,代码来源:contact_info_aliexpress_nops_addcookies.py

示例8: begin

# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
def begin():
    db = DBService(dbName='jddata', tableName='thirdPartShopInfo')
    data = db.getData()
    title = db.getTableTitle()[1:-2]
    S = set()
    for item in data:
        S.add(tuple(item[1:-2]))
    data = []
    for item in S:
        data.append(list(item))
    csv = CSV()
    csv.writeCsv(savePath='D:/spider', fileTitle=title, data=data, fileName='jdData')
开发者ID:yangmingsong,项目名称:python,代码行数:14,代码来源:jdDataAna.py

示例9: sumCommentCount

# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
def sumCommentCount():
    db = DBService(dbName='jddata', tableName='thirdPartShopInfoAddCommnetCount')
    # db = DBService(dbName='jddata', tableName='thirdPartShopInfoAddtest')
    data = db.getData(var='shopName,commnetCount')
    dict = {}
    for item in data:
        if item[0] in dict.keys():
            dict[item[0]] = int(item[1]) + dict[item[0]]
        else:
            dict[item[0]] = int(item[1])
    data = []
    for item in dict.items():
        data.append([item[0], item[1]])
    csv = CSV()
    csv.writeCsv(savePath='D:/spider', fileTitle=['shopName', 'commnetCount'], data=data, fileName='jdDataSum')
开发者ID:yangmingsong,项目名称:python,代码行数:17,代码来源:jdDataAna.py

示例10: startUrlList

# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
 def startUrlList(self):
     """
     # 方法重载
     :return:
     """
     dbs = DBService(dbName='jddata', tableName='jdproductbaseinfo2database')
     data = dbs.getData(var='productHref,sku', distinct=True)
     dataThirdPartBase = [item[0] for item in data if len(item[1]) >= 10]
     dataHadCrawled = DBService(dbName='jddata', tableName='thirdPartShopInfo').getData(var='productHref')
     if not dataHadCrawled:
         return dataThirdPartBase
     dataHadCrawled = set([item[0] for item in dataHadCrawled])
     dataThirdPart = [item for item in dataThirdPartBase if item not in dataHadCrawled]
     dataThirdPart = [item for item in dataThirdPart if item[:4] == 'http']
     # print len(dataThirdPart)
     return dataThirdPart
开发者ID:yangmingsong,项目名称:python,代码行数:18,代码来源:spider.py

示例11: savePicture

# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
def savePicture():
    from screenShot import saveScreenShot
    from ms_spider_fw.DBSerivce import DBService
    import time
    import random

    db = DBService(dbName='tmalldata', tableName='tmall_baseinfo_realtime')
    data = db.getData(var='name,href', distinct=True)
    nameD = map(lambda x: x[0], data)
    data = map(lambda x: x[1], data)
    print(len(data))
    dri = None
    for url in data:
        name=nameD[data.index(url)]
        print(name)
        dri = saveScreenShot(url, driver=dri,title=name)
        time.sleep(abs(random.gauss(3, 2)))
开发者ID:yangmingsong,项目名称:python,代码行数:19,代码来源:scrennShot_test.py

示例12: DBService

# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
# connect_dict = {'host': '10.118.187.12', 'user': 'admin', 'passwd': 'admin', 'charset': 'utf8'}
connect_dict = {'host': 'localhost', 'user': 'root', 'passwd': '', 'charset': 'utf8'}

# db_server = DBService(dbName=db_name, tableName=table_name, **connect_dict)
# proxy_list = map(lambda x: x[0], db_server.getData(var='proxy_port', distinct=True))
# for p in proxy_list:
#     qu_proxy_test.put(p)

patt_ip = re.compile(r'(?<![\.\d])(?:\d{1,3}\.){3}\d{1,3}(?![\.\d])')
proxy_list = []

for table_name in table_name_s.split(','):
    print table_name
    db_server = DBService(dbName=db_name, tableName=table_name, **connect_dict)
    if db_server.isTableExist():
        proxy_list += map(lambda x: x[0], db_server.getData(var='proxy_port'))

proxy_list_t=list(set(proxy_list))
for p in proxy_list_t:
    qu_proxy_test.put(p)


def original_ip_address():
    t = requests.get('http://httpbin.org/ip').text
    return json.loads(t).get('origin')


original = original_ip_address()


def test():
开发者ID:yangmingsong,项目名称:python,代码行数:33,代码来源:httpbin_proxy_test.py

示例13: reload

# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
#!/usr/bin/env python
# -*- encoding: utf-8 -*-

from ms_spider_fw.DBSerivce import DBService
import json
import re
import requests
import sys
from datetime import datetime

reload(sys)
sys.setdefaultencoding('utf8')

db_server = DBService(dbName='test', tableName='weibo_cellphone')  # , **connect_dict)
data = db_server.getData(var='detail_json', limit=20)
data = filter(lambda x: 1 if x[0][0] == '{' else 0, filter(lambda x: 1 if x[0] else 0, data))

re_sub_p = re.compile('<.+?>')
re_sub_t = re.compile('\+\d+?\s')


def time_format(ori):
    if not ori:
        return ''
    o = re.sub(re_sub_t, '', ori)
    s = datetime.strptime(o, '%a %b %d %H:%M:%S %Y')
    return s.strftime('%Y-%m-%d %H:%M:%S')


# extract_info from json string
def extract_info(x):
开发者ID:yangmingsong,项目名称:python,代码行数:33,代码来源:post_data_test.py

示例14: DBService

# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
import threading
import time
from Queue import Queue as qu

from ms_proxy import proxy_test
from ms_spider_fw.DBSerivce import DBService

# config text
db_name = 'b2c_base'
# give some tables name to extract proxy list to test , different table name be combined use ','
table_name_s = 'proxy_you_dai_li,proxy_xi_ci_dai_li'
connect_dict = {'host': '10.118.187.12', 'user': 'admin', 'passwd': 'admin', 'charset': 'utf8'}
proxy_list = []
for table_name in table_name_s.split(','):
    db_server = DBService(dbName=db_name, tableName=table_name, **connect_dict)
    proxy_list += map(lambda x: x[0], db_server.getData(var='proxy_port', distinct=True))

# with open("d:/proxy_2.txt", 'r')as f:
#     t = f.read()
# proxy_list = t.split('\n')

# script
qu_proxy_test = qu(0)
qu_proxy_ok = qu(0)

for t in set(proxy_list):
    qu_proxy_test.put(t)


def test():
    while qu_proxy_test.qsize():
开发者ID:yangmingsong,项目名称:python,代码行数:33,代码来源:proxy_ok(daily).py

示例15: int

# 需要导入模块: from ms_spider_fw.DBSerivce import DBService [as 别名]
# 或者: from ms_spider_fw.DBSerivce.DBService import getData [as 别名]
#coding:utf8
__author__ = '613108'
from ms_spider_fw.DBSerivce import DBService
dbs=DBService(dbName='elec_platform',tableName='tmall_baseinfo_everyweek')
data=dbs.getData()
data=[item for item in data if int(item[-2])>=35]
print(len(data))
开发者ID:yangmingsong,项目名称:python,代码行数:9,代码来源:tmallDataWeekly.py


注:本文中的ms_spider_fw.DBSerivce.DBService.getData方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。