Java SaveMode类代码示例

本文整理汇总了Java中org.apache.spark.sql.SaveMode类的典型用法代码示例。如果您正苦于以下问题：Java SaveMode类的具体用法？Java SaveMode怎么用？Java SaveMode使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

SaveMode类属于org.apache.spark.sql包，在下文中一共展示了SaveMode类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.spark.sql.SaveMode; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
	Flags.setFromCommandLineArgs(THE_OPTIONS, args);

	// 初始化Spark Conf.
	SparkConf conf = new SparkConf().setAppName("A SECTONG Application: Apache Log Analysis with Spark");
	JavaSparkContext sc = new JavaSparkContext(conf);
	JavaStreamingContext jssc = new JavaStreamingContext(sc, Flags.getInstance().getSlideInterval());
	SQLContext sqlContext = new SQLContext(sc);

	// 初始化参数
	HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(Flags.getInstance().getKafka_topic().split(",")));
	HashMap<String, String> kafkaParams = new HashMap<String, String>();
	kafkaParams.put("metadata.broker.list", Flags.getInstance().getKafka_broker());

	// 从Kafka Stream获取数据
	JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(jssc, String.class, String.class,
			StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet);

	JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
		private static final long serialVersionUID = 5266880065425088203L;

		public String call(Tuple2<String, String> tuple2) {
			return tuple2._2();
		}
	});

	JavaDStream<ApacheAccessLog> accessLogsDStream = lines.flatMap(line -> {
		List<ApacheAccessLog> list = new ArrayList<>();
		try {
			// 映射每一行
			list.add(ApacheAccessLog.parseFromLogLine(line));
			return list;
		} catch (RuntimeException e) {
			return list;
		}
	}).cache();

	accessLogsDStream.foreachRDD(rdd -> {

		// rdd to DataFrame
		DataFrame df = sqlContext.createDataFrame(rdd, ApacheAccessLog.class);
		// 写入Parquet文件
		df.write().partitionBy("ipAddress", "method", "responseCode").mode(SaveMode.Append).parquet(Flags.getInstance().getParquetFile());

		return null;
	});

	// 启动Streaming服务器
	jssc.start(); // 启动计算
	jssc.awaitTermination(); // 等待终止
}

开发者ID:sectong，项目名称:SparkToParquet，代码行数:52，代码来源:AppMain.java

示例2: buildProperties

import org.apache.spark.sql.SaveMode; //导入依赖的package包/类
public void buildProperties() {
	// return rows of format <predicate, is_complex>
	// is_complex can be 1 or 0
	// 1 for multivalued predicate, 0 for single predicate

	// select the properties that are complex
	Dataset<Row> multivaluedProperties = spark.sql(String.format(
			"SELECT DISTINCT(%1$s) AS %1$s FROM "
			+ "(SELECT %2$s, %1$s, COUNT(*) AS rc FROM %3$s GROUP BY %2$s, %1$s HAVING rc > 1) AS grouped",
			column_name_predicate, column_name_subject, name_tripletable));

	// select all the properties
	Dataset<Row> allProperties = spark.sql(String.format("SELECT DISTINCT(%1$s) AS %1$s FROM %2$s",
			column_name_predicate, name_tripletable));

	// select the properties that are not complex
	Dataset<Row> singledValueProperties = allProperties.except(multivaluedProperties);

	// combine them
	Dataset<Row> combinedProperties = singledValueProperties
			.selectExpr(column_name_predicate, "0 AS is_complex")
			.union(multivaluedProperties.selectExpr(column_name_predicate, "1 AS is_complex"));
	
	// remove '<' and '>', convert the characters
	Dataset<Row> cleanedProperties = combinedProperties.withColumn("p", 
			functions.regexp_replace(functions.translate(combinedProperties.col("p"), "<>", ""), 
			"[[^\\w]+]", "_"));
	
	// write the result
	cleanedProperties.write().mode(SaveMode.Overwrite).saveAsTable("properties");
	logger.info("Created properties table with name: " + tablename_properties);
}

开发者ID:tf-dbis-uni-freiburg，项目名称:PRoST，代码行数:33，代码来源:PropertyTableLoader.java

示例3: execute

import org.apache.spark.sql.SaveMode; //导入依赖的package包/类
@Override
public Object execute(SparkSession sparkSession, ActionStatement actionStatement, CredentialProvider credentialManager) {

    String filePath = actionStatement.getParamValues().get(0).getValue().toString();
    String saveModeStr = actionStatement.getParamValues().get(1).getValue().toString();
    String dfTableName = actionStatement.getParamValues().get(2).getValue().toString();

    SaveMode saveMode = SaveMode.valueOf(saveModeStr);

    String sql = String.format("select * from %s", dfTableName);
    logger.info(String.format("Running sql [%s] to get data and then save it", sql));
    Dataset<Row> df = sparkSession.sql(sql);

    logger.info(String.format("Saving to csv %s, saveMode: %s", filePath, saveMode));
    df.coalesce(1).write().mode(saveMode).option("header", "false").csv(filePath);
    logger.info(String.format("Saved to csv %s, saveMode: %s", filePath, saveMode));
    return null;
}

开发者ID:uber，项目名称:uberscriptquery，代码行数:19，代码来源:WriteCsvFileActionStatementExecutor.java

示例4: execute

import org.apache.spark.sql.SaveMode; //导入依赖的package包/类
@Override
public Object execute(SparkSession sparkSession, ActionStatement actionStatement, CredentialProvider credentialManager) {

    String filePath = actionStatement.getParamValues().get(0).getValue().toString();
    String saveModeStr = actionStatement.getParamValues().get(1).getValue().toString();
    String dfTableName = actionStatement.getParamValues().get(2).getValue().toString();

    SaveMode saveMode = SaveMode.valueOf(saveModeStr);

    String sql = String.format("select * from %s", dfTableName);
    logger.info(String.format("Running sql [%s] to get data and then save it", sql));
    Dataset<Row> df = sparkSession.sql(sql);

    logger.info(String.format("Saving to json %s, saveMode: %s", filePath, saveMode));
    df.coalesce(1).write().mode(saveMode).json(filePath);
    logger.info(String.format("Saved to json %s, saveMode: %s", filePath, saveMode));
    return null;
}

开发者ID:uber，项目名称:uberscriptquery，代码行数:19，代码来源:WriteJsonFileActionStatementExecutor.java

示例5: execute

import org.apache.spark.sql.SaveMode; //导入依赖的package包/类
@Override
public Object execute(SparkSession sparkSession, ActionStatement actionStatement, CredentialProvider credentialManager) {

    String filePath = actionStatement.getParamValues().get(0).getValue().toString();
    String saveModeStr = actionStatement.getParamValues().get(1).getValue().toString();
    String dfTableName = actionStatement.getParamValues().get(2).getValue().toString();

    SaveMode saveMode = SaveMode.valueOf(saveModeStr);

    String sql = String.format("select * from %s", dfTableName);
    logger.info(String.format("Running sql [%s] to get data and then save it", sql));
    Dataset<Row> df = sparkSession.sql(sql);

    logger.info(String.format("Saving to parquet %s, saveMode: %s", filePath, saveMode));
    df.coalesce(1).write().mode(saveMode).parquet(filePath);
    logger.info(String.format("Saved to parquet %s, saveMode: %s", filePath, saveMode));
    return null;
}

开发者ID:uber，项目名称:uberscriptquery，代码行数:19，代码来源:WriteParquetFileActionStatementExecutor.java

示例6: writeParquetFile

import org.apache.spark.sql.SaveMode; //导入依赖的package包/类
@SuppressWarnings({ "unchecked", "rawtypes" })
public DataSet<ExecRow> writeParquetFile(int[] baseColumnMap, int[] partitionBy, String location,  String compression,
                                      OperationContext context) {
    try {
        Dataset<Row> insertDF = SpliceSpark.getSession().createDataFrame(
                rdd.map(new SparkSpliceFunctionWrapper<>(new CountWriteFunction(context))).map(new LocatedRowToRowFunction()),
                context.getOperation().getExecRowDefinition().schema());

        List<Column> cols = new ArrayList();
        for (int i = 0; i < baseColumnMap.length; i++) {
                cols.add(new Column(ValueRow.getNamedColumn(baseColumnMap[i])));
        }
        List<String> partitionByCols = new ArrayList();
        for (int i = 0; i < partitionBy.length; i++) {
            partitionByCols.add(ValueRow.getNamedColumn(partitionBy[i]));
        }
        insertDF.write().option(SPARK_COMPRESSION_OPTION,compression).partitionBy(partitionByCols.toArray(new String[partitionByCols.size()]))
                .mode(SaveMode.Append).parquet(location);
        ValueRow valueRow=new ValueRow(1);
        valueRow.setColumn(1,new SQLLongint(context.getRecordsWritten()));
        return new SparkDataSet<>(SpliceSpark.getContext().parallelize(Collections.singletonList(valueRow), 1));
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

开发者ID:splicemachine，项目名称:spliceengine，代码行数:26，代码来源:SparkDataSet.java

示例7: writeORCFile

import org.apache.spark.sql.SaveMode; //导入依赖的package包/类
@SuppressWarnings({ "unchecked", "rawtypes" })
public DataSet<ExecRow> writeORCFile(int[] baseColumnMap, int[] partitionBy, String location,  String compression,
                                                OperationContext context) {
    try {
        Dataset<Row> insertDF = SpliceSpark.getSession().createDataFrame(
                rdd.map(new SparkSpliceFunctionWrapper<>(new CountWriteFunction(context))).map(new LocatedRowToRowFunction()),
                context.getOperation().getExecRowDefinition().schema());
        List<Column> cols = new ArrayList();
        for (int i = 0; i < baseColumnMap.length; i++) {
            cols.add(new Column(ValueRow.getNamedColumn(baseColumnMap[i])));
        }
        String[] partitionByCols = new String[partitionBy.length];
        for (int i = 0; i < partitionBy.length; i++) {
            partitionByCols[i] = ValueRow.getNamedColumn(partitionBy[i]);
        }
        insertDF.write().option(SPARK_COMPRESSION_OPTION,compression)
                .partitionBy(partitionByCols)
                .mode(SaveMode.Append).orc(location);
        ValueRow valueRow=new ValueRow(1);
        valueRow.setColumn(1,new SQLLongint(context.getRecordsWritten()));
        return new SparkDataSet<>(SpliceSpark.getContext().parallelize(Collections.singletonList(valueRow), 1));
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

开发者ID:splicemachine，项目名称:spliceengine，代码行数:26，代码来源:SparkDataSet.java

示例8: writeTextFile

import org.apache.spark.sql.SaveMode; //导入依赖的package包/类
@SuppressWarnings({ "unchecked", "rawtypes" })
public DataSet<ExecRow> writeTextFile(SpliceOperation op, String location, String characterDelimiter, String columnDelimiter,
                                            int[] baseColumnMap,
                                            OperationContext context) {

    try {
        Dataset<Row> insertDF = SpliceSpark.getSession().createDataFrame(
                rdd.map(new SparkSpliceFunctionWrapper<>(new CountWriteFunction(context))).map(new LocatedRowToRowFunction()),
                context.getOperation().getExecRowDefinition().schema());
        List<Column> cols = new ArrayList();
        for (int i = 0; i < baseColumnMap.length; i++) {
            cols.add(new Column(ValueRow.getNamedColumn(baseColumnMap[i])));
        }
        // spark-2.2.0: commons-lang3-3.3.2 does not support 'XXX' timezone, specify 'ZZ' instead
        insertDF.write().option("timestampFormat", "yyyy-MM-dd'T'HH:mm:ss.SSSZZ")
                .mode(SaveMode.Append).csv(location);
        ValueRow valueRow=new ValueRow(1);
        valueRow.setColumn(1,new SQLLongint(context.getRecordsWritten()));
        return new SparkDataSet<>(SpliceSpark.getContext().parallelize(Collections.singletonList(valueRow), 1));
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

开发者ID:splicemachine，项目名称:spliceengine，代码行数:24，代码来源:SparkDataSet.java

示例9: repartitionByNumberOfFile

import org.apache.spark.sql.SaveMode; //导入依赖的package包/类
public void repartitionByNumberOfFile(String inputPath, String outputPath, int numberOfParquet, boolean overwrite)
		throws IOException {

	// Leggo il path di input
	Path input = new Path(inputPath);

	// Leggo il path di output
	Path output = new Path(outputPath);

	// Controllo se la cartella di input è vuota
	if (HDFSUtils.containsFiles(fileSystem, input, ".parquet")) {
		throw new FileNotFoundException("The folder " + inputPath + " does not contain any .parquet file");
	}

	// Se la cartella di output esiste, non è vuota e non si è specificato
	// la sovrascrittura
	if (overwrite == false && fileSystem.exists(output)
			&& !HDFSUtils.containsFiles(fileSystem, output, ".parquet")) {
		throw new IOException("The output folder " + outputPath + " already contains parquet files");
	}

	DataFrame dataInput = sqlContext.read().parquet(input.toString());

	dataInput.repartition(numberOfParquet).write().mode(overwrite ? SaveMode.Overwrite : SaveMode.ErrorIfExists)
			.parquet(output.toString());

}

开发者ID:pfratta，项目名称:ParquetUtils，代码行数:28，代码来源:ParquetRepartEngine.java

示例10: load

import org.apache.spark.sql.SaveMode; //导入依赖的package包/类
@Override
public void load() {

	logger.info("Beginning the creation of VP tables.");
	
	if (this.properties_names == null){
		logger.error("Properties not calculated yet. Extracting them");
		this.properties_names = extractProperties();
	}
	
	Vector<TableStats> tables_stats =  new Vector<TableStats>();
	ThreadPoolExecutor loaders_pool = (ThreadPoolExecutor) Executors.newFixedThreadPool(max_parallelism);
	
	for(int i = 0; i < this.properties_names.length; i++){
	    String property = this.properties_names[i];
		Dataset<Row> table_VP = spark.sql("SELECT s AS s, o AS o FROM tripletable WHERE p='" + property + "'");
		String table_name_VP = "vp_" + this.getValidHiveName(property);
		loaders_pool.submit(new Thread(() -> {
		  // save the table
		  table_VP.write().mode(SaveMode.Overwrite).saveAsTable(table_name_VP);
		  // calculate stats
		  if(computeStatistics)
		    tables_stats.add(calculate_stats_table(table_VP, this.getValidHiveName(property)));
		  
		  logger.info("Created VP table for the property: " + property);
        }));
		
	}
	
	// save the stats in a file with the same name as the output database
	if(computeStatistics)
	  save_stats(this.database_name, tables_stats);
	
	logger.info("Vertical Partitioning completed. Loaded " + String.valueOf(this.properties_names.length) + " tables.");
	
}

开发者ID:tf-dbis-uni-freiburg，项目名称:PRoST，代码行数:37，代码来源:VerticalPartitioningLoader.java

示例11: buildComplexPropertyTable

import org.apache.spark.sql.SaveMode; //导入依赖的package包/类
/**
 * Create the final property table, allProperties contains the list of all
 * possible properties isComplexProperty contains (in the same order used by
 * allProperties) the boolean value that indicates if that property is
 * complex (called also multi valued) or simple.
 */
public void buildComplexPropertyTable(String[] allProperties, Boolean[] isComplexProperty) {

	// create a new aggregation environment
	PropertiesAggregateFunction aggregator = new PropertiesAggregateFunction(allProperties, columns_separator);

	String predicateObjectColumn = "po";
	String groupColumn = "group";

	// get the compressed table
	Dataset<Row> compressedTriples = spark.sql(String.format("SELECT %s, CONCAT(%s, '%s', %s) AS po FROM %s",
			column_name_subject, column_name_predicate, columns_separator, column_name_object, name_tripletable));

	// group by the subject and get all the data
	Dataset<Row> grouped = compressedTriples.groupBy(column_name_subject)
			.agg(aggregator.apply(compressedTriples.col(predicateObjectColumn)).alias(groupColumn));

	// build the query to extract the property from the array
	String[] selectProperties = new String[allProperties.length + 1];
	selectProperties[0] = column_name_subject;
	for (int i = 0; i < allProperties.length; i++) {
		
		// if property is a full URI, remove the < at the beginning end > at the end
		String rawProperty = allProperties[i].startsWith("<") && allProperties[i].endsWith(">") ? 
				allProperties[i].substring(1, allProperties[i].length() - 1) :  allProperties[i];
		// if is not a complex type, extract the value
		String newProperty = isComplexProperty[i]
				? " " + groupColumn + "[" + String.valueOf(i) + "] AS " + getValidHiveName(rawProperty)
				: " " + groupColumn + "[" + String.valueOf(i) + "][0] AS " + getValidHiveName(rawProperty);
		selectProperties[i + 1] = newProperty;
	}
	Dataset<Row> propertyTable = grouped.selectExpr(selectProperties);
	
	// write the final one, partitioned by subject
	propertyTable.write().mode(SaveMode.Overwrite).format(table_format).saveAsTable(output_tablename);
	logger.info("Created property table with name: " + output_tablename);

}

开发者ID:tf-dbis-uni-freiburg，项目名称:PRoST，代码行数:44，代码来源:PropertyTableLoader.java

示例12: writeAncestorsToTable

import org.apache.spark.sql.SaveMode; //导入依赖的package包/类
/**
 * Writes ancestor records to a table. This class ensures the columns and partitions are mapped
 * properly, and is a workaround similar to the problem described <a
 * href="http://stackoverflow.com/questions/35313077/pyspark-order-of-column-on-write-to-mysql-with-jdbc">here</a>.
 *
 * @param ancestors a dataset of ancestor records
 * @param tableName the table to write them to
 */
private static void writeAncestorsToTable(Dataset<Ancestor> ancestors, String tableName) {

  Dataset<Row> orderedColumnDataset = ancestors.select("descendantSystem",
      "descendantValue",
      "ancestorSystem",
      "ancestorValue",
      "uri",
      "version");

  orderedColumnDataset.write()
      .mode(SaveMode.ErrorIfExists)
      .insertInto(tableName);
}

开发者ID:cerner，项目名称:bunsen，代码行数:22，代码来源:Hierarchies.java

示例13: writeValuesToTable

import org.apache.spark.sql.SaveMode; //导入依赖的package包/类
/**
 * Writes value records to a table. This class ensures the columns and partitions are mapped
 * properly, and is a workaround similar to the problem described <a
 * href="http://stackoverflow.com/questions/35313077/pyspark-order-of-column-on-write-to-mysql-with-jdbc">here</a>.
 *
 * @param values a dataset of value records
 * @param tableName the table to write them to
 */
private static void writeValuesToTable(Dataset<Value> values, String tableName) {

  // Note the last two columns here must be the partitioned-by columns in order and in lower case
  // for Spark to properly match them to the partitions
  Dataset<Row> orderColumnDataset = values.select("system",
      "version",
      "value",
      "valueseturi",
      "valuesetversion");

  orderColumnDataset.write()
      .mode(SaveMode.ErrorIfExists)
      .insertInto(tableName);
}

开发者ID:cerner，项目名称:bunsen，代码行数:23，代码来源:ValueSets.java

示例14: savePropertiesIntoTable

import org.apache.spark.sql.SaveMode; //导入依赖的package包/类
/**
 * Create "properties" table. See:
 * {@link ComplexPropertyTableLoader#tablename_properties}.
 */
public void savePropertiesIntoTable() {
	// return rows of format <predicate, is_complex>
	// is_complex can be 1 or 0
	// 1 for multivalued predicate, 0 for single predicate

	// select the properties that are complex
	DataFrame multivaluedProperties = this.hiveContext.sql(String.format(
			"SELECT DISTINCT(%1$s) AS %1$s FROM (SELECT %2$s, %1$s, COUNT(*) AS rc FROM %3$s GROUP BY %2$s, %1$s HAVING rc > 1) AS grouped",
			column_name_predicate, column_name_subject, tablename_triple_table));

	// select all the properties
	DataFrame allProperties = this.hiveContext.sql(String.format("SELECT DISTINCT(%1$s) AS %1$s FROM %2$s",
			column_name_predicate, tablename_triple_table));

	// select the properties that are not complex
	DataFrame singledValueProperties = allProperties.except(multivaluedProperties);

	// combine them
	DataFrame combinedProperties = singledValueProperties
			.selectExpr(column_name_predicate, "0 AS " + column_name_is_complex)
			.unionAll(multivaluedProperties.selectExpr(column_name_predicate, "1 AS " + column_name_is_complex));
	
	// remove '<' and '>', convert the characters
	DataFrame cleanedProperties = combinedProperties.withColumn("p", functions.regexp_replace(functions.translate(combinedProperties.col("p"), "<>", ""), 
			"[[^\\w]+]", "_"));
	
	// write the result
	cleanedProperties.write().mode(SaveMode.Overwrite).saveAsTable(tablename_properties);
}

开发者ID:aschaetzle，项目名称:Sempala，代码行数:34，代码来源:ComplexPropertyTableLoader.java

示例15: buildComplexPropertyTable

import org.apache.spark.sql.SaveMode; //导入依赖的package包/类
/**
 * Create the final property table, allProperties contains the list of all
 * possible properties isComplexProperty contains (in the same order used by
 * allProperties) the boolean value that indicates if that property is
 * complex (called also multi valued) or simple.
 */
public void buildComplexPropertyTable(String[] allProperties, Boolean[] isComplexProperty) {

	// create a new aggregation environment
	PropertiesAggregateFunction aggregator = new PropertiesAggregateFunction(allProperties, columns_separator);

	String predicateObjectColumn = "po";
	String groupColumn = "group";

	// get the compressed table
	DataFrame compressedTriples = this.hiveContext.sql(String.format("SELECT %s, CONCAT(%s, '%s', %s) AS po FROM %s",
			column_name_subject, column_name_predicate, columns_separator, column_name_object, tablename_triple_table));

	// group by the subject and get all the data
	DataFrame grouped = compressedTriples.groupBy(column_name_subject)
			.agg(aggregator.apply(compressedTriples.col(predicateObjectColumn)).alias(groupColumn));

	// build the query to extract the property from the array
	String[] selectProperties = new String[allProperties.length + 1];
	selectProperties[0] = column_name_subject;
	for (int i = 0; i < allProperties.length; i++) {
		
		// if property is a full URI, remove the < at the beginning end > at the end
		String rawProperty = allProperties[i].startsWith("<") && allProperties[i].endsWith(">") ? 
				allProperties[i].substring(1, allProperties[i].length() - 1) :  allProperties[i];
		// if is not a complex type, extract the value
		String newProperty = isComplexProperty[i]
				? " " + groupColumn + "[" + String.valueOf(i) + "] AS " + getValidColumnName(rawProperty)
				: " " + groupColumn + "[" + String.valueOf(i) + "][0] AS " + getValidColumnName(rawProperty);
		selectProperties[i + 1] = newProperty;
	}

	DataFrame propertyTable = grouped.selectExpr(selectProperties);

	// write the final one
	propertyTable.write().mode(SaveMode.Overwrite).format(table_format_parquet)
			.saveAsTable(tablename_complex_property_table);
}

开发者ID:aschaetzle，项目名称:Sempala，代码行数:44，代码来源:ComplexPropertyTableLoader.java

注：本文中的org.apache.spark.sql.SaveMode类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。