本文整理匯總了Python中pyspark.SQLContext.dropTempTable方法的典型用法代碼示例。如果您正苦於以下問題:Python SQLContext.dropTempTable方法的具體用法?Python SQLContext.dropTempTable怎麽用?Python SQLContext.dropTempTable使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pyspark.SQLContext
的用法示例。
在下文中一共展示了SQLContext.dropTempTable方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: double
# 需要導入模塊: from pyspark import SQLContext [as 別名]
# 或者: from pyspark.SQLContext import dropTempTable [as 別名]
|-- Dropoff_longitude: double (nullable = true)
|-- Dropoff_latitude: double (nullable = true)
|-- Passenger_count: integer (nullable = true)
|-- Total_amount: double (nullable = true)
|-- Trip_type: integer (nullable = true)
"""
tripsData.registerTempTable("tripsData")
# The trips whose pickup time is between 2am -- 6am
GIS_Points = sqlContext.sql("select lpep_pickup_datetime, Pickup_longitude, Pickup_latitude from tripsData " +
"where lpep_pickup_datetime like '%2015-__-__ 02:%' "
"or lpep_pickup_datetime like '%2015-__-__ 03:%' "
"or lpep_pickup_datetime like '%2015-__-__ 04:%' "
"or lpep_pickup_datetime like '%2015-__-__ 05:%' "
).cache()
sqlContext.dropTempTable("tripsData")
# This is a list
shapeRecs = nyc_shapefile.shapeRecords()
def point_in_poly(x, y, poly):
n = len(poly)
inside = False
p1x, p1y = poly[0]
for i in range(n + 1):
p2x, p2y = poly[i % n]
if y > min(p1y, p2y):
if y <= max(p1y, p2y):
if x <= max(p1x, p2x):
示例2: __init__
# 需要導入模塊: from pyspark import SQLContext [as 別名]
# 或者: from pyspark.SQLContext import dropTempTable [as 別名]
class DataHandler:
def __init__(self):
self.conf = (SparkConf()
.setAppName("BandCard")
.set("spark.cores.max", "2")
.set('spark.executor.extraClassPath', '/usr/local/env/lib/mysql-connector-java-5.1.38-bin.jar'))
self.sc = SparkContext(conf=self.conf)
self.sqlctx = SQLContext(self.sc)
self.mysql_helper = MySQLHelper('core', host='10.9.29.212')
def load_from_mysql(self, table, database='core'):
url = "jdbc:mysql://10.9.29.212:3306/%s?user=root&characterEncoding=UTF-8" % database
df = self.sqlctx.read.format("jdbc").options(url=url, dbtable=table, driver="com.mysql.jdbc.Driver").load()
return df
def prepare_life_cycle(self, year, season):
'''
準備生命周期數據
從t_CMMS_ASSLIB_ASSET中獲取每日AUM數據
prepare data
saum1 (last season sum aum)
saum2 (current season sum aum)
aum_now
account_age (months)
last_tr_date (days)
:param year:
:param season: 1,2,3,4
:return:
'''
# 計算月份
print('----------------------生命周期-Start----------------------')
print('開始準備生命周期數據...')
print('開始計算月份')
if season == 1:
# date1 當前季度月份
date1 = [str(year) + '-01', str(year) + '-02', str(year) + '-03']
# date2 上一季月份
date2 = [str(year - 1) + '-10', str(year - 1) + '-11', str(year - 1) + '-12']
elif season == 4:
date1 = [str(year) + '-10', str(year) + '-11', str(year) + '-12']
date2 = [str(year) + '-07', str(year) + '-08', str(year) + '-9']
else:
date1 = [str(year) + '-0' + str(3 * season - 2), str(year) + '-0' + str(3 * season - 1),
str(year) + '-0' + str(3 * season)]
date2 = [str(year) + '-0' + str(3 * season - 5), str(year) + '-0' + str(3 * season - 4),
str(year) + '-0' + str(3 * season - 3)]
print('當前季度月份 new:', date1)
print('上一季度月份 old:', date2)
# 加載AUM表
aum = self.load_from_mysql('t_CMMS_ASSLIB_ASSET_c').cache()
# 拚接每季度三個月斷數據
season_new = aum.filter(aum.STAT_DAT == date1[0]).unionAll(aum.filter(aum.STAT_DAT == date1[1])).unionAll(
aum.filter(aum.STAT_DAT == date1[2]))
season_old = aum.filter(aum.STAT_DAT == date2[0]).unionAll(aum.filter(aum.STAT_DAT == date2[1])).unionAll(
aum.filter(aum.STAT_DAT == date2[2]))
# 計算每季度AUM
aum_season_old = season_old.select('CUST_NO', season_old.AUM.alias('AUM1')).groupBy('CUST_NO').sum('AUM1')
aum_season_new = season_new.select('CUST_NO', season_new.AUM.alias('AUM2')).groupBy('CUST_NO').sum('AUM2')
# 兩個季度進行外聯接
'''
+-----------+---------+---------+
| CUST_NO|sum(AUM2)|sum(AUM1)|
+-----------+---------+---------+
|81005329523| null|294844.59|
|81011793167| null| 365.20|
|81015319088| null| 9640.96|
+-----------+---------+---------+
'''
union_season = aum_season_old.join(aum_season_new, 'CUST_NO', 'outer')
# 篩選當前AUM
temp_result = aum.select('CUST_NO', 'AUM', 'STAT_DAT').groupBy('CUST_NO', 'STAT_DAT').sum('AUM').sort(
'CUST_NO').sort(aum.STAT_DAT.desc())
temp_result.select('CUST_NO', temp_result['sum(AUM)'].alias('AUM'), 'STAT_DAT').registerTempTable('group_in')
aum_now_sql = "select CUST_NO,first(AUM) as AUM_NOW from group_in group by CUST_NO"
aum_now = self.sqlctx.sql(aum_now_sql)
# 清除緩存表
self.sqlctx.dropTempTable('group_in')
# 聯合
union_season_aumnow = union_season.join(aum_now, 'CUST_NO', 'outer')
#.........這裏部分代碼省略.........