當前位置: 首頁>>代碼示例>>Python>>正文


Python SQLContext.clearCache方法代碼示例

本文整理匯總了Python中pyspark.SQLContext.clearCache方法的典型用法代碼示例。如果您正苦於以下問題:Python SQLContext.clearCache方法的具體用法?Python SQLContext.clearCache怎麽用?Python SQLContext.clearCache使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在pyspark.SQLContext的用法示例。


在下文中一共展示了SQLContext.clearCache方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: __init__

# 需要導入模塊: from pyspark import SQLContext [as 別名]
# 或者: from pyspark.SQLContext import clearCache [as 別名]

#.........這裏部分代碼省略.........
        aum_now = self.sqlctx.sql(aum_now_sql)
        # 清除緩存表
        self.sqlctx.dropTempTable('group_in')

        # 聯合
        union_season_aumnow = union_season.join(aum_now, 'CUST_NO', 'outer')

        # 計算用戶開戶至今時間(months)
        # 載入賬戶表
        account = self.load_from_mysql('t_CMMS_ACCOUNT_LIST').cache()
        account.select('CUST_NO', 'OPEN_DAT').registerTempTable('account')
        account_age_aql = "select  CUST_NO, first(ACCOUNT_AGE) as ACCOUNT_AGE  from " \
                          "(select CUST_NO, round(datediff(now(), OPEN_DAT) / 30) as ACCOUNT_AGE " \
                          "from account order by CUST_NO, ACCOUNT_AGE desc ) as t group by CUST_NO"

        account_age = self.sqlctx.sql(account_age_aql)

        # calculate last tran date
        account_1 = account.select('CUST_NO', 'ACC_NO15')
        detail = self.load_from_mysql('t_CMMS_ACCOUNT_DETAIL').select('ACC_NO15', 'TRAN_DAT')
        a_d = account_1.join(detail, 'ACC_NO15', 'outer')
        a_d.filter(a_d.CUST_NO != '').registerTempTable('adtable')

        last_tr_date_sql = "select CUST_NO,first(TRAN_DAT) as LAST_TR_DATE from (select CUST_NO,TRAN_DAT from adtable order by TRAN_DAT desc) as t group by CUST_NO"

        last_tr_date = self.sqlctx.sql(last_tr_date_sql)

        # 聯合 season   aum_now    account_age     last_tr_date
        unions = union_season_aumnow.join(account_age, 'CUST_NO', 'outer').join(last_tr_date, 'CUST_NO', 'outer')

        # 清除緩存表
        self.sqlctx.dropTempTable('account')
        self.sqlctx.dropTempTable('adtable')
        self.sqlctx.clearCache()

        # 結果插入表
        print('結果插入臨時表:t_CMMS_TEMP_LIFECYCLE...')
        insert_lifecycle_sql = "replace into t_CMMS_TEMP_LIFECYCLE(CUST_NO,SAUM1,SAUM2,INCREASE,ACCOUNT_AGE,AUM_NOW,LAST_TR_DATE) values(%s,%s,%s,%s,%s,%s,%s)"

        # 緩衝區
        temp = []
        for row in unions.collect():
            row_dic = row.asDict()

            if len(temp) >= 1000:  # 批量寫入數據庫
                self.mysql_helper.executemany(insert_lifecycle_sql, temp)
                temp.clear()

            # 加載數據到緩衝區

            try:
                # 計算增長率
                increase = (row_dic['sum(AUM2)'] - row_dic['sum(AUM1)']) / row_dic['sum(AUM1)']
            except Exception:
                increase = 0

            # 計算開戶時長(月份數) 若無則視為6個月以上
            if row_dic['ACCOUNT_AGE'] is None:
                row_dic['ACCOUNT_AGE'] = 7

            # 最後交易日期
            ltd = row_dic['LAST_TR_DATE']
            if ltd is not None:
                try:
                    ltd = datetime.datetime.strptime(ltd, '%Y-%m-%d')
                except Exception:
開發者ID:summer-apple,項目名稱:spark,代碼行數:70,代碼來源:band_card.py

示例2: __init__

# 需要導入模塊: from pyspark import SQLContext [as 別名]
# 或者: from pyspark.SQLContext import clearCache [as 別名]
class Credit:
    def __init__(self):
        self.conf = (SparkConf()
                     .setAppName("CREDIT")
                     .set("spark.cores.max", "2")
                     .set('spark.executor.extraClassPath', '/usr/local/env/lib/mysql-connector-java-5.1.38-bin.jar'))
        self.sc = SparkContext(conf=self.conf)
        self.sqlctx = SQLContext(self.sc)
        self.mysql_helper = MySQLHelper('core', host='10.9.29.212')
        self.base = 'hdfs://master:9000/gmc/'

    def load_from_mysql(self, table, database='core'):
        url = "jdbc:mysql://10.9.29.212:3306/%s?user=root&characterEncoding=UTF-8" % database
        df = self.sqlctx.read.format("jdbc").options(url=url, dbtable=table, driver="com.mysql.jdbc.Driver").load()
        return df

    def sql_operate(self, sql, rdd, once_size=1000):
        temp = []
        for row in rdd.collect():
            # print(row)
            if len(temp) >= once_size:
                self.mysql_helper.executemany(sql, temp)
                temp.clear()
            temp.append(row)

        if len(temp) != 0:
            self.mysql_helper.executemany(sql, temp)
            temp.clear()

    def prepare_fpgrowth_data(self):
        tran_df = self.load_from_mysql('t_CMMS_CREDIT_TRAN').filter("BILL_AMTFLAG = '+'").select('ACCTNBR',
                                                                                                 'MER_CAT_CD') \
            .filter("MER_CAT_CD != 0").filter("MER_CAT_CD != 6013")

        result = tran_df.map(lambda x: (str(int(x['ACCTNBR'])), [str(int(x['MER_CAT_CD'])), ])).groupByKey()

        def m(x):
            k = x[0]
            l = list(x[1])

            v = set()
            for i in l:
                v.add(i[0])

            return set(v)

        result = result.map(m)
        for i in result.take(10):
            print(i)

        model = FPGrowth.train(result, minSupport=0.05, numPartitions=10)
        result = model.freqItemsets().collect()
        for r in result:
            print(r)

    def cycle_credit(self):
        '''
        信用卡聚類數據預處理
        :return:
        '''

        print('---------------------------信用卡-Start--------------------------')
        # 交易流水
        credit_tran_df = self.load_from_mysql('t_CMMS_CREDIT_TRAN').select('ACCTNBR', 'MONTH_NBR', 'BILL_AMT',
                                                                           'BILL_AMTFLAG').filter(
            "BILL_AMTFLAG ='-'").cache()

        # 卡賬戶信息
        credit_acct_df = self.load_from_mysql('ACCT_D').select('ACCTNBR', 'MONTH_NBR', 'STM_MINDUE')

        # 還款計算
        return_amt = credit_tran_df.groupBy('ACCTNBR', 'MONTH_NBR').sum('BILL_AMT')
        return_amt = return_amt.select('ACCTNBR', 'MONTH_NBR', return_amt['sum(BILL_AMT)'].alias('RETURNED'))

        # 去除0最低還款額,即未消費的賬單月
        join = credit_acct_df.join(return_amt, ['ACCTNBR', 'MONTH_NBR'], 'outer').filter('STM_MINDUE != 0')

        # 清除緩存
        self.sqlctx.clearCache()

        def which_cycle_type(line):
            mindue = line['STM_MINDUE']
            returned = line['RETURNED']

            '''
            0:normal,all returned
            1:cycle credit
            2:overdue,don't return money
            '''
            if mindue is not None and returned is None:
                flag = 2
            elif returned >= mindue * 10:
                flag = 0
            elif returned > mindue and returned < mindue * 10:
                flag = 1
            else:
                flag = 9

            return Row(ACCTNBR=int(line['ACCTNBR']), MONTH_NBR=line['MONTH_NBR'], DUE_FLAG=flag,
                       STM_MINDUE=line['STM_MINDUE'])
#.........這裏部分代碼省略.........
開發者ID:summer-apple,項目名稱:spark,代碼行數:103,代碼來源:credit.py


注:本文中的pyspark.SQLContext.clearCache方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。