当前位置: 首页>>代码示例>>Java>>正文


Java DataFrame.printSchema方法代码示例

本文整理汇总了Java中org.apache.spark.sql.DataFrame.printSchema方法的典型用法代码示例。如果您正苦于以下问题:Java DataFrame.printSchema方法的具体用法?Java DataFrame.printSchema怎么用?Java DataFrame.printSchema使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.spark.sql.DataFrame的用法示例。


在下文中一共展示了DataFrame.printSchema方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.spark.sql.DataFrame; //导入方法依赖的package包/类
/**
 * Main method..
 *
 * @param args the arguments
 */
public static void main(final String[] args) {
  final String tableName = "SparkExampleDFUsingCSV";

  /** get the locator host/port from arguments, if specified.. **/
  final String locatorHost = args.length > 0 ? args[0] : "localhost";
  final int locatorPort = args.length > 1 ? Integer.valueOf(args[1]) : 10334;

  /** create SparkContext **/
  SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("SparkExampleDFUsingCSV");
  JavaSparkContext jsc = new JavaSparkContext(conf);
  SQLContext sqlContext = new SQLContext(jsc);

  StructType customSchema = new StructType(new StructField[] {
          new StructField("year", DataTypes.IntegerType, true, Metadata.empty()),
          new StructField("make", DataTypes.StringType, true, Metadata.empty()),
          new StructField("model", DataTypes.StringType, true, Metadata.empty()),
          new StructField("comment", DataTypes.StringType, true, Metadata.empty()),
          new StructField("blank", DataTypes.StringType, true, Metadata.empty())
  });

  DataFrame df = sqlContext.read()
          .format("com.databricks.spark.csv")
          .schema(customSchema)
          .option("header", "true")
          .load("cars.csv");


  /** print schema of the data-frame **/
  df.printSchema();

  df.show();

  Map<String, String> options = new HashMap<>(3);
  options.put("ampool.locator.host", locatorHost);
  options.put("ampool.locator.port", String.valueOf(locatorPort));

  /** overwrite existing table, if specified.. **/
  SaveMode saveMode = Boolean.getBoolean("overwrite") ? SaveMode.Overwrite : SaveMode.ErrorIfExists;

  /** save the dataFrame to Ampool as `tableName' **/
  df.write().format("io.ampool").options(options).mode(saveMode).save(tableName);

  System.out.println("########## DATA FROM AMPOOL ############");

  /** load the data-frame from Ampool `tableName' **/
  DataFrame df1 = sqlContext.read().format("io.ampool").options(options).load(tableName);

  /** show the contents of loaded data-frame **/
  df1.show();

  /** show the total number of rows in data-frame **/
  System.out.println("# NumberOfRowsInDataFrame= " + df1.count());

  /** data-frame with filter **/
  df1.filter("year > 1997").show();

  /** data-frame with selected columns **/
  df1.select("year", "make", "model", "comment").show();

  df1.registerTempTable("temp_table");

  sqlContext.sql("select * from temp_table order by year").show();
}
 
开发者ID:ampool,项目名称:monarch,代码行数:69,代码来源:SparkExampleDFUsingCSV.java

示例2: main

import org.apache.spark.sql.DataFrame; //导入方法依赖的package包/类
/**
 * Main method..
 *
 * @param args the arguments
 */
public static void main(final String[] args) {
  final String tableName = "SparkExampleDF";

  /** get the locator host/port from arguments, if specified.. **/
  final String locatorHost = args.length > 0 ? args[0] : "localhost";
  final int locatorPort = args.length > 1 ? Integer.valueOf(args[1]) : 10334;

  /** create SparkContext **/
  SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("SparkExampleDF");
  JavaSparkContext jsc = new JavaSparkContext(conf);
  SQLContext sqlContext = new SQLContext(jsc);

  /** create data-frame from existing data.. **/
  DataFrame df = sqlContext.createDataFrame(jsc.parallelize(SAMPLE_DATA), Employee.class);

  /** print schema of the data-frame **/
  df.printSchema();

  df.show();

  Map<String, String> options = new HashMap<>(3);
  options.put("ampool.locator.host", locatorHost);
  options.put("ampool.locator.port", String.valueOf(locatorPort));

  /** overwrite existing table, if specified.. **/
  SaveMode saveMode = Boolean.getBoolean("overwrite") ? SaveMode.Overwrite : SaveMode.ErrorIfExists;

  /** save the dataFrame to Ampool as `tableName' **/
  df.write().format("io.ampool").options(options).mode(saveMode).save(tableName);

  System.out.println("########## DATA FROM AMPOOL ############");

  /** load the data-frame from Ampool `tableName' **/
  DataFrame df1 = sqlContext.read().format("io.ampool").options(options).load(tableName);

  /** show the contents of loaded data-frame **/
  df1.show();

  /** show the total number of rows in data-frame **/
  System.out.println("# NumberOfRowsInDataFrame= " + df1.count());

  /** data-frame with filter **/
  df1.filter("id > 2").show();

  /** data-frame with selected columns **/
  df1.select("name", "id", "department").show();

  df1.registerTempTable("temp_table");

  sqlContext.sql("select * from temp_table order by id").show();
}
 
开发者ID:ampool,项目名称:monarch,代码行数:57,代码来源:SparkExampleDF.java


注:本文中的org.apache.spark.sql.DataFrame.printSchema方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。