本文整理汇总了Python中pyspark.sql.HiveContext.hql方法的典型用法代码示例。如果您正苦于以下问题:Python HiveContext.hql方法的具体用法?Python HiveContext.hql怎么用?Python HiveContext.hql使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyspark.sql.HiveContext
的用法示例。
在下文中一共展示了HiveContext.hql方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: len
# 需要导入模块: from pyspark.sql import HiveContext [as 别名]
# 或者: from pyspark.sql.HiveContext import hql [as 别名]
# A simple hive demo. If you do not have a table to load from look run MakeHiveTable.py
from pyspark import SparkContext
from pyspark.sql import HiveContext
import json
import sys
if __name__ == "__main__":
if len(sys.argv) != 3:
print "Error usage: LoadHive [sparkmaster] [inputtable]"
sys.exit(-1)
master = sys.argv[1]
inputTable = sys.argv[2]
sc = SparkContext(master, "LoadHive")
hiveCtx = HiveContext(sc)
# Query hive
input = hiveCtx.hql("FROM " + inputTable + " SELECT key, value")
data = input.map(lambda x: x['key'] * x['key'])
result = data.collect()
for element in result:
print "Got data " + str(element)
sc.stop()
print "Done!"
示例2: SparkConf
# 需要导入模块: from pyspark.sql import HiveContext [as 别名]
# 或者: from pyspark.sql.HiveContext import hql [as 别名]
# A simple demo for working with SparkSQL and Tweets
from pyspark import SparkContext, SparkConf
from pyspark.sql import HiveContext, Row, IntegerType
import json
import sys
if __name__ == "__main__":
inputFile = sys.argv[1]
conf = SparkConf().setAppName("SparkSQLTwitter")
sc = SparkContext()
hiveCtx = HiveContext(sc)
print "Loading tweets from " + inputFile
input = hiveCtx.jsonFile(inputFile)
input.registerTempTable("tweets")
topTweets = hiveCtx.hql("SELECT text, retweetCount FROM tweets ORDER BY retweetCount LIMIT 10")
print topTweets.collect()
topTweetText = topTweets.map(lambda row : row.text)
print topTweetText.collect()
# Make a happy person row
happyPeopleRDD = sc.parallelize([Row(name="holden", favouriteBeverage="coffee")])
happyPeopleSchemaRDD = hiveCtx.inferSchema(happyPeopleRDD)
happyPeopleSchemaRDD.registerTempTable("happy_people")
# Make a UDF to tell us how long some text is
hiveCtx.registerFunction("strLenPython", lambda x: len(x), IntegerType())
lengthSchemaRDD = hiveCtx.hql("SELECT strLenPython('text') FROM tweets LIMIT 10")
print lengthSchemaRDD.collect()
sc.stop()
示例3: SparkConf
# 需要导入模块: from pyspark.sql import HiveContext [as 别名]
# 或者: from pyspark.sql.HiveContext import hql [as 别名]
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#########################################################################
# File Name: loadHive.py
# Author: lpqiu
# mail: [email protected]
# Created Time: 2014年11月30日 星期日 21时31分06秒
#########################################################################
from pyspark import SparkConf,SparkContext,StoreLevel
from pyspark.sql import HiveContext, SQLContext
conf = SparkConf().setMaster("local").setAppName("My App")
sc = SparkContext(conf)
hiveCtx = HiveContext(sc)
rows = hiveCtx.hql("SELECT key,value FROM src")
keys = rows.map(lambda row: row["key"])
# Parquet load example
sqlCtx = SQLContext(sc)
rows = sqlCtx.ParquetFile("people.parquet")
names = rows.map(lambda row:row["name"])
示例4: len
# 需要导入模块: from pyspark.sql import HiveContext [as 别名]
# 或者: from pyspark.sql.HiveContext import hql [as 别名]
# Createas a hive table and loads an input file into it
# For input you can use examples/src/main/resources/kv1.txt from the spark
# distribution
from pyspark import SparkContext
from pyspark.sql import HiveContext
import json
import sys
if __name__ == "__main__":
if len(sys.argv) != 4:
print "Error usage: LoadHive [sparkmaster] [inputFile] [inputtable]"
sys.exit(-1)
master = sys.argv[1]
inputFile = sys.argv[2]
inputTable = sys.argv[3]
sc = SparkContext(master, "LoadHive")
hiveCtx = HiveContext(sc)
# Load some data into hive
hiveCtx.hql(
"CREATE TABLE IF NOT EXISTS " +
inputTable +
" (key INT, value STRING)")
hiveCtx.hql(
"LOAD DATA LOCAL INPATH '" + inputFile + "' INTO TABLE " + inputTable)