當前位置: 首頁>>代碼示例>>Java>>正文


Java Dataset.createOrReplaceTempView方法代碼示例

本文整理匯總了Java中org.apache.spark.sql.Dataset.createOrReplaceTempView方法的典型用法代碼示例。如果您正苦於以下問題:Java Dataset.createOrReplaceTempView方法的具體用法?Java Dataset.createOrReplaceTempView怎麽用?Java Dataset.createOrReplaceTempView使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在org.apache.spark.sql.Dataset的用法示例。


在下文中一共展示了Dataset.createOrReplaceTempView方法的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: getPayloadFromCsv

import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
private static Dataset<Row> getPayloadFromCsv( final SparkSession sparkSession, JdbcIntegrationConfig config ) {

        //        String csvPath = Resources.getResource( "dispatch_persons.csv" ).getPath();

        String sql = "(select * from dbo.Dispatch_Persons where Dis_id IN "
                + "( select distinct (Dis_Id) from Dispatch where CFS_DateTimeJanet > DateADD(d, -90, GETDATE()) ) ) Dispatch_Persons";
        logger.info( "SQL Query for persons: {}", sql );
        Dataset<Row> payload = sparkSession
                .read()
                .format( "jdbc" )
                .option( "url", config.getUrl() )
                .option( "dbtable", sql )
                .option( "password", config.getDbPassword() )
                .option( "user", config.getDbUser() )
                .option( "driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver" )
                .load();
        payload.createOrReplaceTempView( "Dispatch_Persons" );
        //                .filter( col( "Timercvd" ).geq( DateTime.now().minusDays( 2 ) ) )
        //                .filter( col( "Type" ).notEqual( "2" ) );

        return payload;
    }
 
開發者ID:dataloom,項目名稱:integrations,代碼行數:23,代碼來源:DispatchPersonsFlight.java

示例2: processAndRegisterTempTable

import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
private void processAndRegisterTempTable(Dataset<Row> df, RootStatement rootStatement, String tableAlias, String logText, boolean debug) {
    boolean dfPersisted = false;
    long tableReferenceCount = rootStatement.getTableReferenceCount().getCount(tableAlias);

    if (tableReferenceCount > 1) {
        df = df.persist(StorageLevel.MEMORY_AND_DISK_SER());
        dfPersisted = true;
        logger.info(String.format("Persist table %s because it is referenced %s times", tableAlias, tableReferenceCount));
    } else {
        logger.info(String.format("Do not persist table %s because it is referenced %s times", tableAlias, tableReferenceCount));
    }

    df.createOrReplaceTempView(tableAlias);
    logger.info(String.format("Registered temp view %s for query: %s", tableAlias, logText));

    if (debug) {
        if (!dfPersisted) {
            df = df.persist(StorageLevel.MEMORY_AND_DISK_SER());
        }

        // TODO save debug info/data
    }
}
 
開發者ID:uber,項目名稱:uberscriptquery,代碼行數:24,代碼來源:QueryEngine.java

示例3: main

import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
public static void main(String[] args) {
	System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");
	SparkSession sparkSession = SparkSession.builder()
			.master("local")
			.appName("CSV Read Example")
			.config("spark.sql.warehouse.dir", "file:////C:/Users/sgulati/spark-warehouse")
			.getOrCreate();
	
	Dataset<Row> csv = sparkSession.read().format("com.databricks.spark.csv").option("header","true")
			.load("C:\\Users\\sgulati\\Documents\\my_docs\\book\\testdata\\emp.csv");
	
	csv.createOrReplaceTempView("test");
	Dataset<Row> sql = sparkSession.sql("select * from test");
	sql.collectAsList();
}
 
開發者ID:PacktPublishing,項目名稱:Apache-Spark-2x-for-Java-Developers,代碼行數:16,代碼來源:SparkSessionHeloWorld.java

示例4: load_ntriples

import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
public void load_ntriples() {
	String ds = hdfs_input_directory;
	Dataset<Row> triple_table_file = spark.read().text(ds);

	
	String triple_regex = build_triple_regex();

	Dataset<Row> triple_table = triple_table_file.select(
			functions.regexp_extract(functions.col("value"), triple_regex, 1).alias(this.column_name_subject),
			functions.regexp_extract(functions.col("value"), triple_regex, 2).alias(this.column_name_predicate),
			functions.regexp_extract(functions.col("value"), triple_regex, 3).alias(this.column_name_object));
	
	triple_table.createOrReplaceTempView(name_tripletable);
	logger.info("Created tripletable");
}
 
開發者ID:tf-dbis-uni-freiburg,項目名稱:PRoST,代碼行數:16,代碼來源:TripleTableLoader.java

示例5: main

import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
public static void main(String[] args) {
	//Window Specific property if Hadoop is not instaalled or HADOOP_HOME is not set
	 System.setProperty("hadoop.home.dir", "E:\\hadoop");
	
	 //Build a Spark Session	
      SparkSession sparkSession = SparkSession
      .builder()
      .master("local")
	  .config("spark.sql.warehouse.dir","file:///E:/hadoop/warehouse")
      .appName("EdgeBuilder")
      .getOrCreate();
      Logger rootLogger = LogManager.getRootLogger();
	  rootLogger.setLevel(Level.WARN); 
	// Read the CSV data
		 Dataset<Row> emp_ds = sparkSession.read()
				 .format("com.databricks.spark.csv")
   		         .option("header", "true")
   		         .option("inferSchema", "true")
   		         .load("src/main/resources/employee.txt");    
    		
	    UDF2 calcDays=new CalcDaysUDF();
	  //Registering the UDFs in Spark Session created above      
	    sparkSession.udf().register("calcDays", calcDays, DataTypes.LongType);
	    
	    emp_ds.createOrReplaceTempView("emp_ds");
	    
	    emp_ds.printSchema();
	    emp_ds.show();
	    
	    sparkSession.sql("select calcDays(hiredate,'dd-MM-yyyy') from emp_ds").show();   
	    //Instantiate UDAF
	    AverageUDAF calcAvg= new AverageUDAF();
	    //Register UDAF to SparkSession
	    sparkSession.udf().register("calAvg", calcAvg);
	    //Use UDAF
	    sparkSession.sql("select deptno,calAvg(salary) from emp_ds group by deptno ").show(); 
	   
	    //
	    TypeSafeUDAF typeSafeUDAF=new TypeSafeUDAF();
	    
	    Dataset<Employee> emf = emp_ds.as(Encoders.bean(Employee.class));
	    emf.printSchema();
	    emf.show();
	    
	    TypedColumn<Employee, Double> averageSalary = typeSafeUDAF.toColumn().name("averageTypeSafe");
	    Dataset<Double> result = emf.select(averageSalary);
	   result.show();
	    

}
 
開發者ID:PacktPublishing,項目名稱:Apache-Spark-2x-for-Java-Developers,代碼行數:51,代碼來源:UDFExample.java

示例6: main

import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
public static void main(String[] args) throws StreamingQueryException {
	System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");
	SparkSession sparkSession = SparkSession.builder().master("local[*]").appName("structured Streaming Example")
			.config("spark.sql.warehouse.dir", "file:////C:/Users/sgulati/spark-warehouse").getOrCreate();

	Dataset<Row> inStream = sparkSession.readStream().format("socket").option("host", "10.204.136.223")
			.option("port", 9999).load();

	Dataset<FlightDetails> dsFlightDetails = inStream.as(Encoders.STRING()).map(x -> {
		ObjectMapper mapper = new ObjectMapper();
		return mapper.readValue(x, FlightDetails.class);

	}, Encoders.bean(FlightDetails.class));
	
	
	dsFlightDetails.createOrReplaceTempView("flight_details");
	
	Dataset<Row> avdFlightDetails = sparkSession.sql("select flightId, avg(temperature) from flight_details group by flightId");
	
	StreamingQuery query = avdFlightDetails.writeStream()
			  .outputMode("complete")
			  .format("console")
			  .start();

			query.awaitTermination();
	

}
 
開發者ID:PacktPublishing,項目名稱:Apache-Spark-2x-for-Java-Developers,代碼行數:29,代碼來源:StructuredStreamingExample.java

示例7: prepareRDDAndWriteToCassandra

import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
private void prepareRDDAndWriteToCassandra(String localFilePath) throws QueryExecutionException {
	SparkConf conf = new SparkConf();
	conf.setAppName("file-loader-poc");
	conf.setMaster("local[*]");

	SparkContext sparkContext = new SparkContext(conf);

	JavaRDD<VideoViewEvent> videoEventRDD = prepareVideoRDD(localFilePath, sparkContext);

	SparkSession sparkSession = SparkSession.builder().getOrCreate();
	SQLContext sqlContext = new SQLContext(sparkSession);

	Dataset<Row> videoEventDF = sqlContext.createDataFrame(videoEventRDD, VideoViewEvent.class);

	videoEventDF.createOrReplaceTempView("videoEventTempView");

	String videoViewCountQuery = "select videoId, viewDurationInSeconds, count(*)"
			+ " as view_counts from videoEventTempView group by 1, 2";

	List<Row> collectAsList = sqlContext.sql(videoViewCountQuery).collectAsList();
	// printRows(collectAsList);

	// Push directly to Cassandra
	String tableName = "video_view_count";
	Connection<CassandraDBContext> connection = new CassandraConnection("localhost", 9042);
	writeVideoViewCountResultToCassandra(collectAsList, connection, tableName);

	tableName = "user_view_count";
	String userViewCountQuery = "select userId, viewDurationInSeconds, count(*) as"
			+ " view_counts from videoEventTempView group by 1, 2";

	collectAsList = sqlContext.sql(userViewCountQuery).collectAsList();

	writeUserViewCountResultToCassandra(collectAsList, tableName, connection);

	saveAsParquetFiles(sqlContext, videoViewCountQuery);

}
 
開發者ID:alokawi,項目名稱:spark-cassandra-poc,代碼行數:39,代碼來源:SparkFileLoaderUtils.java


注:本文中的org.apache.spark.sql.Dataset.createOrReplaceTempView方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。