本文整理匯總了Java中org.apache.spark.sql.Dataset.collect方法的典型用法代碼示例。如果您正苦於以下問題:Java Dataset.collect方法的具體用法?Java Dataset.collect怎麽用?Java Dataset.collect使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.spark.sql.Dataset
的用法示例。
在下文中一共展示了Dataset.collect方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: start
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
private boolean start() {
SparkSession spark = SparkSession.builder()
.appName("EXIF to Dataset")
.master("local[*]").getOrCreate();
String importDirectory = "/Users/jgp/Pictures";
Dataset<Row> df = spark.read()
.format("exif")
.option("recursive", "true")
.option("limit", "100000")
.option("extensions", "jpg,jpeg")
.load(importDirectory);
// We can start analytics
df = df
.filter(df.col("GeoX").isNotNull())
.filter(df.col("GeoZ").notEqual("NaN"))
.orderBy(df.col("GeoZ").desc());
df.collect();
df.cache();
System.out.println("I have imported " + df.count() + " photos.");
df.printSchema();
df.show(5);
return true;
}
示例2: test_executeScript_read_jdbc
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
@Test
public void test_executeScript_read_jdbc() throws IOException {
File file = File.createTempFile("h2dbfile", ".db");
file.deleteOnExit();
String connectionString = String.format("jdbc:h2:%s;DB_CLOSE_DELAY=-1;MODE=MySQL", file.getAbsolutePath());
SqlUtils.executeJdbcUpdate(connectionString, "create table table1(intColumn bigint, strColumn varchar(15))");
SqlUtils.executeJdbcUpdate(connectionString, "insert into table1(intColumn, strColumn) values (11, 'str1')");
File passwordFile = File.createTempFile("password", ".json");
passwordFile.deleteOnExit();
FileUtils.write(passwordFile, "{'jdbc': {'pwd': 'password1'}}");
QueryEngine engine = new QueryEngine();
engine.setCredentialProvider(new JsonFileCredentialProvider());
String query = String.format(
"source1 = SQL jdbc set connectionString='%s'; set passwordFile='%s'; set passwordEntry='$.jdbc.pwd'; select intColumn, strColumn from table1;",
connectionString,
passwordFile.getAbsolutePath());
engine.executeScript(query, sparkSession);
Dataset<Row> df = sparkSession.sql("select * from source1");
Row[] rows = (Row[]) df.collect();
Assert.assertEquals(1, rows.length);
Assert.assertEquals(2, rows[0].size());
Assert.assertEquals(new Long(11), rows[0].get(0));
Assert.assertEquals("str1", rows[0].get(1));
}
示例3: test_executeScript_read_jdbc_empty_passwordFile
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
@Test
public void test_executeScript_read_jdbc_empty_passwordFile() throws IOException {
File file = File.createTempFile("h2dbfile", ".db");
file.deleteOnExit();
String connectionString = String.format("jdbc:h2:%s;DB_CLOSE_DELAY=-1;MODE=MySQL", file.getAbsolutePath());
SqlUtils.executeJdbcUpdate(connectionString, "create table table1(intColumn bigint, strColumn varchar(15))");
SqlUtils.executeJdbcUpdate(connectionString, "insert into table1(intColumn, strColumn) values (11, 'str1')");
File passwordFile = File.createTempFile("password", ".json");
passwordFile.deleteOnExit();
FileUtils.write(passwordFile, "{'jdbc': {'pwd': 'password1'}}");
QueryEngine engine = new QueryEngine();
String query = String.format(
"source1 = SQL jdbc set connectionString='%s'; set passwordFile=''; set passwordEntry=''; select intColumn, strColumn from table1;",
connectionString);
engine.executeScript(query, sparkSession);
Dataset<Row> df = sparkSession.sql("select * from source1");
Row[] rows = (Row[]) df.collect();
Assert.assertEquals(1, rows.length);
Assert.assertEquals(2, rows[0].size());
Assert.assertEquals(new Long(11), rows[0].get(0));
Assert.assertEquals("str1", rows[0].get(1));
}
示例4: test_executeBatchQuery_week_timepoints_by_10_minutes
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
@Test
public void test_executeBatchQuery_week_timepoints_by_10_minutes() {
QueryEngine engine = new QueryEngine();
String query = "source1 = datagen week_timepoints_by_10_minutes select timepoint from week_timepoints_by_10_minutes;result=select * from source1;";
engine.executeScript(query, sparkSession);
Dataset<Row> df = sparkSession.sql("select * from result");
Row[] rows = (Row[]) df.collect();
Assert.assertEquals(1008, rows.length);
Assert.assertEquals(new Integer(0), rows[0].get(0));
Assert.assertEquals(new Integer(1), rows[1].get(0));
Assert.assertEquals(new Integer(1007), rows[1007].get(0));
}
示例5: test_executeBatchQuery_numbers_1k
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
@Test
public void test_executeBatchQuery_numbers_1k() {
QueryEngine engine = new QueryEngine();
String query = "source1 = datagen numbers_1k select number from numbers_1k;result = select * from source1;";
engine.executeScript(query, sparkSession);
Dataset<Row> df = sparkSession.sql("select * from result");
Row[] rows = (Row[]) df.collect();
Assert.assertEquals(1001, rows.length);
Assert.assertEquals(new Integer(0), rows[0].get(0));
Assert.assertEquals(new Integer(1), rows[1].get(0));
Assert.assertEquals(new Integer(1000), rows[1000].get(0));
}
示例6: test_unix_timestamp
import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
@Test
public void test_unix_timestamp() {
QueryEngine engine = new QueryEngine();
String query = "result=select unix_timestamp(split('2016-12-03T10:38:11.760000+00:00', '\\\\.')[0], \"yyyy-MM-dd'T'HH:mm:ss\") as timestamp;";
engine.executeScript(query, sparkSession);
Dataset<Row> df = sparkSession.sql("select * from result");
Row[] rows = (Row[]) df.collect();
Assert.assertEquals(1, rows.length);
System.out.println("RESULT: " + rows[0].get(0));
}