本文整理汇总了Java中org.apache.spark.sql.DataFrame.printSchema方法的典型用法代码示例。如果您正苦于以下问题:Java DataFrame.printSchema方法的具体用法?Java DataFrame.printSchema怎么用?Java DataFrame.printSchema使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.sql.DataFrame
的用法示例。
在下文中一共展示了DataFrame.printSchema方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.spark.sql.DataFrame; //导入方法依赖的package包/类
/**
* Main method..
*
* @param args the arguments
*/
public static void main(final String[] args) {
final String tableName = "SparkExampleDFUsingCSV";
/** get the locator host/port from arguments, if specified.. **/
final String locatorHost = args.length > 0 ? args[0] : "localhost";
final int locatorPort = args.length > 1 ? Integer.valueOf(args[1]) : 10334;
/** create SparkContext **/
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("SparkExampleDFUsingCSV");
JavaSparkContext jsc = new JavaSparkContext(conf);
SQLContext sqlContext = new SQLContext(jsc);
StructType customSchema = new StructType(new StructField[] {
new StructField("year", DataTypes.IntegerType, true, Metadata.empty()),
new StructField("make", DataTypes.StringType, true, Metadata.empty()),
new StructField("model", DataTypes.StringType, true, Metadata.empty()),
new StructField("comment", DataTypes.StringType, true, Metadata.empty()),
new StructField("blank", DataTypes.StringType, true, Metadata.empty())
});
DataFrame df = sqlContext.read()
.format("com.databricks.spark.csv")
.schema(customSchema)
.option("header", "true")
.load("cars.csv");
/** print schema of the data-frame **/
df.printSchema();
df.show();
Map<String, String> options = new HashMap<>(3);
options.put("ampool.locator.host", locatorHost);
options.put("ampool.locator.port", String.valueOf(locatorPort));
/** overwrite existing table, if specified.. **/
SaveMode saveMode = Boolean.getBoolean("overwrite") ? SaveMode.Overwrite : SaveMode.ErrorIfExists;
/** save the dataFrame to Ampool as `tableName' **/
df.write().format("io.ampool").options(options).mode(saveMode).save(tableName);
System.out.println("########## DATA FROM AMPOOL ############");
/** load the data-frame from Ampool `tableName' **/
DataFrame df1 = sqlContext.read().format("io.ampool").options(options).load(tableName);
/** show the contents of loaded data-frame **/
df1.show();
/** show the total number of rows in data-frame **/
System.out.println("# NumberOfRowsInDataFrame= " + df1.count());
/** data-frame with filter **/
df1.filter("year > 1997").show();
/** data-frame with selected columns **/
df1.select("year", "make", "model", "comment").show();
df1.registerTempTable("temp_table");
sqlContext.sql("select * from temp_table order by year").show();
}
示例2: main
import org.apache.spark.sql.DataFrame; //导入方法依赖的package包/类
/**
* Main method..
*
* @param args the arguments
*/
public static void main(final String[] args) {
final String tableName = "SparkExampleDF";
/** get the locator host/port from arguments, if specified.. **/
final String locatorHost = args.length > 0 ? args[0] : "localhost";
final int locatorPort = args.length > 1 ? Integer.valueOf(args[1]) : 10334;
/** create SparkContext **/
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("SparkExampleDF");
JavaSparkContext jsc = new JavaSparkContext(conf);
SQLContext sqlContext = new SQLContext(jsc);
/** create data-frame from existing data.. **/
DataFrame df = sqlContext.createDataFrame(jsc.parallelize(SAMPLE_DATA), Employee.class);
/** print schema of the data-frame **/
df.printSchema();
df.show();
Map<String, String> options = new HashMap<>(3);
options.put("ampool.locator.host", locatorHost);
options.put("ampool.locator.port", String.valueOf(locatorPort));
/** overwrite existing table, if specified.. **/
SaveMode saveMode = Boolean.getBoolean("overwrite") ? SaveMode.Overwrite : SaveMode.ErrorIfExists;
/** save the dataFrame to Ampool as `tableName' **/
df.write().format("io.ampool").options(options).mode(saveMode).save(tableName);
System.out.println("########## DATA FROM AMPOOL ############");
/** load the data-frame from Ampool `tableName' **/
DataFrame df1 = sqlContext.read().format("io.ampool").options(options).load(tableName);
/** show the contents of loaded data-frame **/
df1.show();
/** show the total number of rows in data-frame **/
System.out.println("# NumberOfRowsInDataFrame= " + df1.count());
/** data-frame with filter **/
df1.filter("id > 2").show();
/** data-frame with selected columns **/
df1.select("name", "id", "department").show();
df1.registerTempTable("temp_table");
sqlContext.sql("select * from temp_table order by id").show();
}