本文整理匯總了Java中org.apache.spark.sql.Dataset.createOrReplaceTempView方法的典型用法代碼示例。如果您正苦於以下問題:Java Dataset.createOrReplaceTempView方法的具體用法?Java Dataset.createOrReplaceTempView怎麽用?Java Dataset.createOrReplaceTempView使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.spark.sql.Dataset
的用法示例。
在下文中一共展示了Dataset.createOrReplaceTempView方法的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: getPayloadFromCsv
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
private static Dataset<Row> getPayloadFromCsv( final SparkSession sparkSession, JdbcIntegrationConfig config ) {
// String csvPath = Resources.getResource( "dispatch_persons.csv" ).getPath();
String sql = "(select * from dbo.Dispatch_Persons where Dis_id IN "
+ "( select distinct (Dis_Id) from Dispatch where CFS_DateTimeJanet > DateADD(d, -90, GETDATE()) ) ) Dispatch_Persons";
logger.info( "SQL Query for persons: {}", sql );
Dataset<Row> payload = sparkSession
.read()
.format( "jdbc" )
.option( "url", config.getUrl() )
.option( "dbtable", sql )
.option( "password", config.getDbPassword() )
.option( "user", config.getDbUser() )
.option( "driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver" )
.load();
payload.createOrReplaceTempView( "Dispatch_Persons" );
// .filter( col( "Timercvd" ).geq( DateTime.now().minusDays( 2 ) ) )
// .filter( col( "Type" ).notEqual( "2" ) );
return payload;
}
示例2: processAndRegisterTempTable
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
private void processAndRegisterTempTable(Dataset<Row> df, RootStatement rootStatement, String tableAlias, String logText, boolean debug) {
boolean dfPersisted = false;
long tableReferenceCount = rootStatement.getTableReferenceCount().getCount(tableAlias);
if (tableReferenceCount > 1) {
df = df.persist(StorageLevel.MEMORY_AND_DISK_SER());
dfPersisted = true;
logger.info(String.format("Persist table %s because it is referenced %s times", tableAlias, tableReferenceCount));
} else {
logger.info(String.format("Do not persist table %s because it is referenced %s times", tableAlias, tableReferenceCount));
}
df.createOrReplaceTempView(tableAlias);
logger.info(String.format("Registered temp view %s for query: %s", tableAlias, logText));
if (debug) {
if (!dfPersisted) {
df = df.persist(StorageLevel.MEMORY_AND_DISK_SER());
}
// TODO save debug info/data
}
}
示例3: main
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
public static void main(String[] args) {
System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");
SparkSession sparkSession = SparkSession.builder()
.master("local")
.appName("CSV Read Example")
.config("spark.sql.warehouse.dir", "file:////C:/Users/sgulati/spark-warehouse")
.getOrCreate();
Dataset<Row> csv = sparkSession.read().format("com.databricks.spark.csv").option("header","true")
.load("C:\\Users\\sgulati\\Documents\\my_docs\\book\\testdata\\emp.csv");
csv.createOrReplaceTempView("test");
Dataset<Row> sql = sparkSession.sql("select * from test");
sql.collectAsList();
}
開發者ID:PacktPublishing,項目名稱:Apache-Spark-2x-for-Java-Developers,代碼行數:16,代碼來源:SparkSessionHeloWorld.java
示例4: load_ntriples
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
public void load_ntriples() {
String ds = hdfs_input_directory;
Dataset<Row> triple_table_file = spark.read().text(ds);
String triple_regex = build_triple_regex();
Dataset<Row> triple_table = triple_table_file.select(
functions.regexp_extract(functions.col("value"), triple_regex, 1).alias(this.column_name_subject),
functions.regexp_extract(functions.col("value"), triple_regex, 2).alias(this.column_name_predicate),
functions.regexp_extract(functions.col("value"), triple_regex, 3).alias(this.column_name_object));
triple_table.createOrReplaceTempView(name_tripletable);
logger.info("Created tripletable");
}
示例5: main
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
public static void main(String[] args) {
//Window Specific property if Hadoop is not instaalled or HADOOP_HOME is not set
System.setProperty("hadoop.home.dir", "E:\\hadoop");
//Build a Spark Session
SparkSession sparkSession = SparkSession
.builder()
.master("local")
.config("spark.sql.warehouse.dir","file:///E:/hadoop/warehouse")
.appName("EdgeBuilder")
.getOrCreate();
Logger rootLogger = LogManager.getRootLogger();
rootLogger.setLevel(Level.WARN);
// Read the CSV data
Dataset<Row> emp_ds = sparkSession.read()
.format("com.databricks.spark.csv")
.option("header", "true")
.option("inferSchema", "true")
.load("src/main/resources/employee.txt");
UDF2 calcDays=new CalcDaysUDF();
//Registering the UDFs in Spark Session created above
sparkSession.udf().register("calcDays", calcDays, DataTypes.LongType);
emp_ds.createOrReplaceTempView("emp_ds");
emp_ds.printSchema();
emp_ds.show();
sparkSession.sql("select calcDays(hiredate,'dd-MM-yyyy') from emp_ds").show();
//Instantiate UDAF
AverageUDAF calcAvg= new AverageUDAF();
//Register UDAF to SparkSession
sparkSession.udf().register("calAvg", calcAvg);
//Use UDAF
sparkSession.sql("select deptno,calAvg(salary) from emp_ds group by deptno ").show();
//
TypeSafeUDAF typeSafeUDAF=new TypeSafeUDAF();
Dataset<Employee> emf = emp_ds.as(Encoders.bean(Employee.class));
emf.printSchema();
emf.show();
TypedColumn<Employee, Double> averageSalary = typeSafeUDAF.toColumn().name("averageTypeSafe");
Dataset<Double> result = emf.select(averageSalary);
result.show();
}
示例6: main
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
public static void main(String[] args) throws StreamingQueryException {
System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");
SparkSession sparkSession = SparkSession.builder().master("local[*]").appName("structured Streaming Example")
.config("spark.sql.warehouse.dir", "file:////C:/Users/sgulati/spark-warehouse").getOrCreate();
Dataset<Row> inStream = sparkSession.readStream().format("socket").option("host", "10.204.136.223")
.option("port", 9999).load();
Dataset<FlightDetails> dsFlightDetails = inStream.as(Encoders.STRING()).map(x -> {
ObjectMapper mapper = new ObjectMapper();
return mapper.readValue(x, FlightDetails.class);
}, Encoders.bean(FlightDetails.class));
dsFlightDetails.createOrReplaceTempView("flight_details");
Dataset<Row> avdFlightDetails = sparkSession.sql("select flightId, avg(temperature) from flight_details group by flightId");
StreamingQuery query = avdFlightDetails.writeStream()
.outputMode("complete")
.format("console")
.start();
query.awaitTermination();
}
開發者ID:PacktPublishing,項目名稱:Apache-Spark-2x-for-Java-Developers,代碼行數:29,代碼來源:StructuredStreamingExample.java
示例7: prepareRDDAndWriteToCassandra
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
private void prepareRDDAndWriteToCassandra(String localFilePath) throws QueryExecutionException {
SparkConf conf = new SparkConf();
conf.setAppName("file-loader-poc");
conf.setMaster("local[*]");
SparkContext sparkContext = new SparkContext(conf);
JavaRDD<VideoViewEvent> videoEventRDD = prepareVideoRDD(localFilePath, sparkContext);
SparkSession sparkSession = SparkSession.builder().getOrCreate();
SQLContext sqlContext = new SQLContext(sparkSession);
Dataset<Row> videoEventDF = sqlContext.createDataFrame(videoEventRDD, VideoViewEvent.class);
videoEventDF.createOrReplaceTempView("videoEventTempView");
String videoViewCountQuery = "select videoId, viewDurationInSeconds, count(*)"
+ " as view_counts from videoEventTempView group by 1, 2";
List<Row> collectAsList = sqlContext.sql(videoViewCountQuery).collectAsList();
// printRows(collectAsList);
// Push directly to Cassandra
String tableName = "video_view_count";
Connection<CassandraDBContext> connection = new CassandraConnection("localhost", 9042);
writeVideoViewCountResultToCassandra(collectAsList, connection, tableName);
tableName = "user_view_count";
String userViewCountQuery = "select userId, viewDurationInSeconds, count(*) as"
+ " view_counts from videoEventTempView group by 1, 2";
collectAsList = sqlContext.sql(userViewCountQuery).collectAsList();
writeUserViewCountResultToCassandra(collectAsList, tableName, connection);
saveAsParquetFiles(sqlContext, videoViewCountQuery);
}