當前位置: 首頁>>代碼示例>>Java>>正文


Java Dataset.collect方法代碼示例

本文整理匯總了Java中org.apache.spark.sql.Dataset.collect方法的典型用法代碼示例。如果您正苦於以下問題:Java Dataset.collect方法的具體用法?Java Dataset.collect怎麽用?Java Dataset.collect使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在org.apache.spark.sql.Dataset的用法示例。


在下文中一共展示了Dataset.collect方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: start

import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
private boolean start() {
    SparkSession spark = SparkSession.builder()
            .appName("EXIF to Dataset")
            .master("local[*]").getOrCreate();
    
    String importDirectory = "/Users/jgp/Pictures";
    
    Dataset<Row> df = spark.read()
            .format("exif")
            .option("recursive", "true")
            .option("limit", "100000")
            .option("extensions", "jpg,jpeg")
            .load(importDirectory);
    
    // We can start analytics
    df = df
            .filter(df.col("GeoX").isNotNull())
            .filter(df.col("GeoZ").notEqual("NaN"))
            .orderBy(df.col("GeoZ").desc());
    df.collect();
    df.cache();
    System.out.println("I have imported " + df.count() + " photos.");
    df.printSchema();
    df.show(5);
    
    return true;
}
 
開發者ID:jgperrin,項目名稱:net.jgp.labs.spark.datasources,代碼行數:28,代碼來源:PhotoMetadataIngestionApp.java

示例2: test_executeScript_read_jdbc

import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
@Test
public void test_executeScript_read_jdbc() throws IOException {
    File file = File.createTempFile("h2dbfile", ".db");
    file.deleteOnExit();

    String connectionString = String.format("jdbc:h2:%s;DB_CLOSE_DELAY=-1;MODE=MySQL", file.getAbsolutePath());

    SqlUtils.executeJdbcUpdate(connectionString, "create table table1(intColumn bigint, strColumn varchar(15))");
    SqlUtils.executeJdbcUpdate(connectionString, "insert into table1(intColumn, strColumn) values (11, 'str1')");

    File passwordFile = File.createTempFile("password", ".json");
    passwordFile.deleteOnExit();

    FileUtils.write(passwordFile, "{'jdbc': {'pwd': 'password1'}}");

    QueryEngine engine = new QueryEngine();
    engine.setCredentialProvider(new JsonFileCredentialProvider());
    String query = String.format(
            "source1 = SQL jdbc set connectionString='%s'; set passwordFile='%s'; set passwordEntry='$.jdbc.pwd'; select intColumn, strColumn from table1;",
            connectionString,
            passwordFile.getAbsolutePath());
    engine.executeScript(query, sparkSession);
    Dataset<Row> df = sparkSession.sql("select * from source1");
    Row[] rows = (Row[]) df.collect();
    Assert.assertEquals(1, rows.length);
    Assert.assertEquals(2, rows[0].size());
    Assert.assertEquals(new Long(11), rows[0].get(0));
    Assert.assertEquals("str1", rows[0].get(1));
}
 
開發者ID:uber,項目名稱:uberscriptquery,代碼行數:30,代碼來源:QueryEngineTest.java

示例3: test_executeScript_read_jdbc_empty_passwordFile

import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
@Test
public void test_executeScript_read_jdbc_empty_passwordFile() throws IOException {
    File file = File.createTempFile("h2dbfile", ".db");
    file.deleteOnExit();

    String connectionString = String.format("jdbc:h2:%s;DB_CLOSE_DELAY=-1;MODE=MySQL", file.getAbsolutePath());

    SqlUtils.executeJdbcUpdate(connectionString, "create table table1(intColumn bigint, strColumn varchar(15))");
    SqlUtils.executeJdbcUpdate(connectionString, "insert into table1(intColumn, strColumn) values (11, 'str1')");

    File passwordFile = File.createTempFile("password", ".json");
    passwordFile.deleteOnExit();

    FileUtils.write(passwordFile, "{'jdbc': {'pwd': 'password1'}}");

    QueryEngine engine = new QueryEngine();
    String query = String.format(
            "source1 = SQL jdbc set connectionString='%s'; set passwordFile=''; set passwordEntry=''; select intColumn, strColumn from table1;",
            connectionString);
    engine.executeScript(query, sparkSession);
    Dataset<Row> df = sparkSession.sql("select * from source1");
    Row[] rows = (Row[]) df.collect();
    Assert.assertEquals(1, rows.length);
    Assert.assertEquals(2, rows[0].size());
    Assert.assertEquals(new Long(11), rows[0].get(0));
    Assert.assertEquals("str1", rows[0].get(1));
}
 
開發者ID:uber,項目名稱:uberscriptquery,代碼行數:28,代碼來源:QueryEngineTest.java

示例4: test_executeBatchQuery_week_timepoints_by_10_minutes

import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
@Test
public void test_executeBatchQuery_week_timepoints_by_10_minutes() {
    QueryEngine engine = new QueryEngine();
    String query = "source1 = datagen week_timepoints_by_10_minutes select timepoint from week_timepoints_by_10_minutes;result=select * from source1;";
    engine.executeScript(query, sparkSession);
    Dataset<Row> df = sparkSession.sql("select * from result");
    Row[] rows = (Row[]) df.collect();
    Assert.assertEquals(1008, rows.length);
    Assert.assertEquals(new Integer(0), rows[0].get(0));
    Assert.assertEquals(new Integer(1), rows[1].get(0));
    Assert.assertEquals(new Integer(1007), rows[1007].get(0));
}
 
開發者ID:uber,項目名稱:uberscriptquery,代碼行數:13,代碼來源:QueryEngineTest.java

示例5: test_executeBatchQuery_numbers_1k

import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
@Test
public void test_executeBatchQuery_numbers_1k() {
    QueryEngine engine = new QueryEngine();
    String query = "source1 = datagen numbers_1k select number from numbers_1k;result = select * from source1;";
    engine.executeScript(query, sparkSession);
    Dataset<Row> df = sparkSession.sql("select * from result");
    Row[] rows = (Row[]) df.collect();
    Assert.assertEquals(1001, rows.length);
    Assert.assertEquals(new Integer(0), rows[0].get(0));
    Assert.assertEquals(new Integer(1), rows[1].get(0));
    Assert.assertEquals(new Integer(1000), rows[1000].get(0));
}
 
開發者ID:uber,項目名稱:uberscriptquery,代碼行數:13,代碼來源:QueryEngineTest.java

示例6: test_unix_timestamp

import org.apache.spark.sql.Dataset; //導入方法依賴的package包/類
@Test
public void test_unix_timestamp() {
    QueryEngine engine = new QueryEngine();
    String query = "result=select unix_timestamp(split('2016-12-03T10:38:11.760000+00:00', '\\\\.')[0], \"yyyy-MM-dd'T'HH:mm:ss\") as timestamp;";
    engine.executeScript(query, sparkSession);
    Dataset<Row> df = sparkSession.sql("select * from result");
    Row[] rows = (Row[]) df.collect();
    Assert.assertEquals(1, rows.length);
    System.out.println("RESULT: " + rows[0].get(0));
}
 
開發者ID:uber,項目名稱:uberscriptquery,代碼行數:11,代碼來源:QueryEngineTest.java


注:本文中的org.apache.spark.sql.Dataset.collect方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。