本文整理汇总了Java中org.apache.spark.sql.hive.HiveContext类的典型用法代码示例。如果您正苦于以下问题:Java HiveContext类的具体用法?Java HiveContext怎么用?Java HiveContext使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
HiveContext类属于org.apache.spark.sql.hive包,在下文中一共展示了HiveContext类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: Spark
import org.apache.spark.sql.hive.HiveContext; //导入依赖的package包/类
/**
* Initializes a Spark connection. Use it afterwards for execution of Spark
* SQL queries.
*
* @param appName
* the name of the app that will be used with this Spark
* connection
* @param database
* name of the database that will be used with this Spark
* connection
*/
public Spark(String appName, String database) {
// TODO check what will happen if there is already in use the same app
// name
this.sparkConfiguration = new SparkConf().setAppName(appName);
this.javaContext = new JavaSparkContext(sparkConfiguration);
this.hiveContext = new HiveContext(javaContext);
// TODO check what kind of exception can be thrown here if there is a
// problem with spark connection
this.hiveContext.sql(String.format("CREATE DATABASE %s", database));
// TODO check what kind of exception is thrown if database already
// use the created database
this.hiveContext.sql((String.format("USE %s", database)));
}
示例2: Spark
import org.apache.spark.sql.hive.HiveContext; //导入依赖的package包/类
/**
* Initializes a Spark connection. Use it afterwards for execution of Spark
* SQL queries.
*
* @param appName the name of the app that will be used with this Spark
* connection
* @param database name of the database that will be used with this Spark
* connection
* @param master the master URI
*/
public Spark(String appName, String database) {
// TODO check what will happen if there is already in use the same app
// name
this.sparkConfiguration = new SparkConf().setAppName(appName).set("spark.io.compression.codec", "snappy");
this.javaContext = new JavaSparkContext(sparkConfiguration);
this.hiveContext = new HiveContext(javaContext);
// use the created database
this.hiveContext.sql((String.format("USE %s", database)));
configureSparkContext();
cacheTable();
}
示例3: loadSpecialFormat
import org.apache.spark.sql.hive.HiveContext; //导入依赖的package包/类
@Override
public DDF loadSpecialFormat(DataFormat format, URI fileURI, Boolean flatten) throws DDFException {
SparkDDFManager sparkDDFManager = (SparkDDFManager)mDDFManager;
HiveContext sqlContext = sparkDDFManager.getHiveContext();
DataFrame jdf = null;
switch (format) {
case JSON:
jdf = sqlContext.jsonFile(fileURI.toString());
break;
case PQT:
jdf = sqlContext.parquetFile(fileURI.toString());
break;
default:
throw new DDFException(String.format("Unsupported data format: %s", format.toString()));
}
DataFrame df = SparkUtils.getDataFrameWithValidColnames(jdf);
DDF ddf = sparkDDFManager.newDDF(sparkDDFManager, df, new Class<?>[]{DataFrame.class},
null, SparkUtils.schemaFromDataFrame(df));
if(flatten == true)
return ddf.getFlattenedDDF();
else
return ddf;
}
示例4: loadFromJDBC
import org.apache.spark.sql.hive.HiveContext; //导入依赖的package包/类
@Override
public DDF loadFromJDBC(JDBCDataSourceDescriptor dataSource) throws DDFException {
SparkDDFManager sparkDDFManager = (SparkDDFManager)mDDFManager;
HiveContext sqlContext = sparkDDFManager.getHiveContext();
JDBCDataSourceCredentials cred = (JDBCDataSourceCredentials)dataSource.getDataSourceCredentials();
String fullURL = dataSource.getDataSourceUri().getUri().toString();
if (cred.getUsername() != null && !cred.getUsername().equals("")) {
fullURL += String.format("?user=%s&password=%s", cred.getUsername(), cred.getPassword());
}
Map<String, String> options = new HashMap<String, String>();
options.put("url", fullURL);
options.put("dbtable", dataSource.getDbTable());
DataFrame df = sqlContext.load("jdbc", options);
DDF ddf = sparkDDFManager.newDDF(sparkDDFManager, df, new Class<?>[]{DataFrame.class},
null, SparkUtils.schemaFromDataFrame(df));
// TODO?
ddf.getRepresentationHandler().get(RDD.class, Row.class);
ddf.getMetaDataHandler().setDataSourceDescriptor(dataSource);
return ddf;
}
示例5: main
import org.apache.spark.sql.hive.HiveContext; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length < 2) {
System.err.println("Usage: JavaSparkSqlBench <workload_name> <hdfs_url>");
System.exit(1);
}
String workload_name = args[0];
String sql_script = args[1];
SparkConf sparkConf = new SparkConf().setAppName(workload_name);
JavaSparkContext ctx = new JavaSparkContext(sparkConf);
HiveContext hc = new HiveContext(ctx.sc());
FileReader in = new FileReader(sql_script);
StringBuilder contents = new StringBuilder();
char[] buffer = new char[40960];
int read = 0;
do {
contents.append(buffer, 0, read);
read = in.read(buffer);
} while (read >= 0);
for (String s : contents.toString().split(";")) {
if (!s.trim().isEmpty()) {
hc.sql(s);
}
}
ctx.stop();
}
示例6: process
import org.apache.spark.sql.hive.HiveContext; //导入依赖的package包/类
@Override
public void process(Exchange exchange) throws Exception {
HiveContext hiveContext = resolveHiveContext();
String sql = exchange.getIn().getBody(String.class);
DataFrame resultFrame = hiveContext.sql(sql);
exchange.getIn().setBody(getEndpoint().isCollect() ? resultFrame.collectAsList() : resultFrame.count());
}
示例7: resolveHiveContext
import org.apache.spark.sql.hive.HiveContext; //导入依赖的package包/类
protected HiveContext resolveHiveContext() {
Set<HiveContext> hiveContexts = getEndpoint().getComponent().getCamelContext().getRegistry().findByType(HiveContext.class);
if (hiveContexts.size() == 1) {
return hiveContexts.iterator().next();
}
return null;
}
示例8: initialize
import org.apache.spark.sql.hive.HiveContext; //导入依赖的package包/类
private void initialize(SparkContext sparkContext, Map<String, String> params) throws DDFException {
this.setSparkContext(sparkContext == null ? this.createSparkContext(params) : sparkContext);
this.mHiveContext = new HiveContext(this.mSparkContext);
String compression = System.getProperty("spark.sql.inMemoryColumnarStorage.compressed", "true");
String batchSize = System.getProperty("spark.sql.inMemoryColumnarStorage.batchSize", "1000");
mLog.info(">>>> spark.sql.inMemoryColumnarStorage.compressed= " + compression);
mLog.info(">>>> spark.sql.inMemoryColumnarStorage.batchSize= " + batchSize);
this.mHiveContext.setConf("spark.sql.inMemoryColumnarStorage.compressed", compression);
this.mHiveContext.setConf("spark.sql.inMemoryColumnarStorage.batchSize", batchSize);
// register SparkSQL UDFs
this.registerUDFs();
this.mDataSourceManager = new SparkDataSourceManager(this);
}
示例9: isTable
import org.apache.spark.sql.hive.HiveContext; //导入依赖的package包/类
public boolean isTable() {
HiveContext hiveContext = ((SparkDDFManager) this.getManager()).getHiveContext();
String[] tableNames = hiveContext.tableNames();
Boolean tableExists = false;
for(String table: tableNames) {
if(table.equals(this.getTableName())) {
tableExists = true;
}
}
return tableExists;
}
示例10: whenHiveContextIsSelectedInConfiguration
import org.apache.spark.sql.hive.HiveContext; //导入依赖的package包/类
@Test
public void whenHiveContextIsSelectedInConfiguration(){
Properties properties = new Properties();
properties.put(AttributteNames.CT_HIVE_CONTEXT,"YES");
sqlContext.loadConfiguration(properties);
assertThat("When exist HiveContext then create instanceof HiveContext", sqlContext.getConnector(), instanceOf(HiveContext.class));
}
示例11: main
import org.apache.spark.sql.hive.HiveContext; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
SparkConf conf = new SparkConf().setAppName("SQLQueryBAM");
JavaSparkContext sc = new JavaSparkContext(conf);
SQLContext sqlContext = new HiveContext(sc.sc());
Options options = new Options();
Option opOpt = new Option( "out", true, "HDFS path for output files. If not present, the output files are not moved to HDFS." );
Option queryOpt = new Option( "query", true, "SQL query string." );
Option baminOpt = new Option( "in", true, "" );
options.addOption( opOpt );
options.addOption( queryOpt );
options.addOption( baminOpt );
CommandLineParser parser = new BasicParser();
CommandLine cmd = null;
try {
cmd = parser.parse( options, args );
}
catch( ParseException exp ) {
System.err.println( "Parsing failed. Reason: " + exp.getMessage() );
}
String bwaOutDir = (cmd.hasOption("out")==true)? cmd.getOptionValue("out"):null;
String query = (cmd.hasOption("query")==true)? cmd.getOptionValue("query"):null;
String bamin = (cmd.hasOption("in")==true)? cmd.getOptionValue("in"):null;
sc.hadoopConfiguration().setBoolean(BAMInputFormat.KEEP_PAIRED_READS_TOGETHER_PROPERTY, true);
//Read BAM/SAM from HDFS
JavaPairRDD<LongWritable, SAMRecordWritable> bamPairRDD = sc.newAPIHadoopFile(bamin, AnySAMInputFormat.class, LongWritable.class, SAMRecordWritable.class, sc.hadoopConfiguration());
//Map to SAMRecord RDD
JavaRDD<SAMRecord> samRDD = bamPairRDD.map(v1 -> v1._2().get());
JavaRDD<MyAlignment> rdd = samRDD.map(bam -> new MyAlignment(bam.getReadName(), bam.getStart(), bam.getReferenceName(), bam.getReadLength(), new String(bam.getReadBases(), StandardCharsets.UTF_8), bam.getCigarString(), bam.getReadUnmappedFlag(), bam.getDuplicateReadFlag()));
Dataset<Row> samDF = sqlContext.createDataFrame(rdd, MyAlignment.class);
samDF.registerTempTable(tablename);
if(query!=null) {
//Save as parquet file
Dataset df2 = sqlContext.sql(query);
df2.show(100,false);
if(bwaOutDir!=null)
df2.write().parquet(bwaOutDir);
}else{
if(bwaOutDir!=null)
samDF.write().parquet(bwaOutDir);
}
sc.stop();
}
示例12: beforeClass
import org.apache.spark.sql.hive.HiveContext; //导入依赖的package包/类
@BeforeClass
public static void beforeClass() {
if (shouldRunHive) {
hiveContext = new HiveContext(sparkContext.sc());
}
}
示例13: getHiveContext
import org.apache.spark.sql.hive.HiveContext; //导入依赖的package包/类
public HiveContext getHiveContext() {
return mHiveContext;
}
示例14: getHiveContext
import org.apache.spark.sql.hive.HiveContext; //导入依赖的package包/类
private HiveContext getHiveContext() {
return ((SparkDDFManager) this.getManager()).getHiveContext();
}
示例15: setup
import org.apache.spark.sql.hive.HiveContext; //导入依赖的package包/类
static void setup() throws VerdictException {
SparkContext sc = new SparkContext(new SparkConf().setAppName("Spark Aggregation Integration Tests"));
sqlContext = new HiveContext(sc);
vc = new VerdictSparkContext(sqlContext);
}