本文整理匯總了Java中org.apache.spark.sql.Dataset.cache方法的典型用法代碼示例。如果您正苦於以下問題:Java Dataset.cache方法的具體用法?Java Dataset.cache怎麽用?Java Dataset.cache使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.spark.sql.Dataset
的用法示例。
在下文中一共展示了Dataset.cache方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: testGetLatest
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
@Test
public void testGetLatest() {
String database = "test_get_latest";
spark.sql("CREATE DATABASE " + database);
ValueSets.getEmpty(spark)
.withValueSets(
valueSet("urn:cerner:valueset:newvalueset", "1"),
valueSet("urn:cerner:valueset:newvalueset", "2"),
valueSet("urn:cerner:valueset:othervalueset", "1"))
.writeToDatabase(database);
Dataset<Value> latest = ValueSets.getFromDatabase(spark, database)
.getLatestValues(ImmutableSet.of("urn:cerner:valueset:newvalueset",
"urn:cerner:valueset:othervalueset"),
true);
latest.cache();
Assert.assertEquals(2, latest.count());
Assert.assertEquals(0, latest.where(
"valueSetUri == 'urn:cerner:valueset:newvalueset' AND valueSetVersion == '1'")
.count());
Assert.assertEquals(1, latest.where(
"valueSetUri == 'urn:cerner:valueset:newvalueset' AND valueSetVersion == '2'")
.count());
Assert.assertEquals(1, latest.where(
"valueSetUri == 'urn:cerner:valueset:othervalueset' AND valueSetVersion == '1'")
.count());
}
示例2: testGetLatest
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
@Test
public void testGetLatest() {
String database = "test_get_latest";
spark.sql("create database " + database);
ConceptMaps.getEmpty(spark)
.withConceptMaps(
conceptMap("urn:cerner:map:newmap", "1"),
conceptMap("urn:cerner:map:newmap", "2"),
conceptMap("urn:cerner:map:othermap", "1"))
.writeToDatabase(database);
Dataset<Mapping> latest = ConceptMaps.getFromDatabase(spark, database)
.getLatestMappings(
ImmutableSet.of("urn:cerner:map:newmap",
"urn:cerner:map:othermap"),
true);
latest.cache();
Assert.assertEquals(2, latest.count());
Assert.assertEquals(0,
latest.where("conceptMapUri == 'urn:cerner:map:newmap' and conceptMapVersion == '1'")
.count());
Assert.assertEquals(1,
latest.where("conceptMapUri == 'urn:cerner:map:newmap' and conceptMapVersion == '2'")
.count());
Assert.assertEquals(1,
latest.where("conceptMapUri == 'urn:cerner:map:othermap' and conceptMapVersion == '1'")
.count());
}
示例3: start
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
private void start() {
// @formatter:off
SparkSession spark = SparkSession
.builder()
.appName("Stores Customer")
.master("local")
.getOrCreate();
// @formatter:on
Config config = ConfigManager.getConfig(K.INFORMIX);
// @formatter:off
Dataset<Row> df = spark
.read()
.format("jdbc")
.option("url", config.getJdbcUrl())
.option("dbtable", config.getTable())
.option("user", config.getUser())
.option("password", config.getPassword())
.option("driver", config.getDriver())
.load();
// @formatter:on
df.cache();
df.printSchema();
System.out.println("Number of rows in " + config
.getTable() + ": " + df.count());
df.show();
}
示例4: start
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
private boolean start() {
SparkSession spark = SparkSession.builder()
.appName("EXIF to Dataset")
.master("local[*]").getOrCreate();
String importDirectory = "/Users/jgp/Pictures";
Dataset<Row> df = spark.read()
.format("exif")
.option("recursive", "true")
.option("limit", "100000")
.option("extensions", "jpg,jpeg")
.load(importDirectory);
// We can start analytics
df = df
.filter(df.col("GeoX").isNotNull())
.filter(df.col("GeoZ").notEqual("NaN"))
.orderBy(df.col("GeoZ").desc());
df.collect();
df.cache();
System.out.println("I have imported " + df.count() + " photos.");
df.printSchema();
df.show(5);
return true;
}
示例5: start
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
private void start() {
SparkSession spark;
// @formatter:off
spark = SparkSession
.builder()
.appName("Sales per week")
.master("local")
.getOrCreate();
// @formatter:on
// List of all tables we want to work with
List<String> tables = new ArrayList<>();
tables.add("orders");
tables.add("items");
// Specific Informix dialect
JdbcDialect dialect = new InformixJdbcDialect();
JdbcDialects.registerDialect(dialect);
// Let's connect to the database
Config config = ConfigManager.getConfig(K.INFORMIX);
// Let's build our datalake
Map<String, Dataset<Row>> datalake = new HashMap<>();
for (String table : tables) {
System.out.print("Loading table [" + table + "] ... ");
// @formatter:off
Dataset<Row> df = spark.read()
.format("jdbc")
.option("url", config.getJdbcUrl())
.option("dbtable", table)
.option("user", config.getUser())
.option("password", config.getPassword())
.option("driver", config.getDriver())
.load();
// @formatter:on
datalake.put(table, df);
System.out.println("done");
}
System.out.println("We have loaded " + datalake.size()
+ " table(s) in our data lake");
// Let's look at the content
Dataset<Row> ordersDf = datalake.get("orders");
Dataset<Row> itemsDf = datalake.get("items");
Dataset<Row> allDf = ordersDf
.join(
itemsDf,
ordersDf.col("order_num").equalTo(itemsDf.col("order_num")),
"full_outer")
.drop(ordersDf.col("customer_num"))
.drop(itemsDf.col("order_num"))
.withColumn("order_week", lit(weekofyear(ordersDf.col("order_date"))));
allDf = allDf
.groupBy(allDf.col("order_week"))
.sum("total_price")
.orderBy(allDf.col("order_week"));
allDf.cache();
allDf.printSchema();
allDf.show(50);
}
示例6: runScriptReturnResults
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
public static Dataset<Row> runScriptReturnResults(String script) {
String[] statements = splitScriptIntoStatements(script);
Dataset<Row> ds = hive.sql(statements[statements.length - 1]);
for (int i = 0; i < statements.length - 1; i++) { ds = ds.union(hive.sql(statements[i])); }
return ds.cache();
}