本文整理汇总了Java中org.apache.spark.sql.Encoders类的典型用法代码示例。如果您正苦于以下问题:Java Encoders类的具体用法?Java Encoders怎么用?Java Encoders使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Encoders类属于org.apache.spark.sql包,在下文中一共展示了Encoders类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: toJson
import org.apache.spark.sql.Encoders; //导入依赖的package包/类
/**
* Converts a set of FHIR resources to JSON.
*
* @param dataset a dataset containing FHIR resources
* @param resourceType the FHIR resource type
* @return a dataset of JSON strings for the FHIR resources
*/
public static Dataset<String> toJson(Dataset<?> dataset, String resourceType) {
Dataset<IBaseResource> resourceDataset =
dataset.as(FhirEncoders.forStu3()
.getOrCreate()
.of(resourceType));
return resourceDataset.map(new ToJson(), Encoders.STRING());
}
示例2: main
import org.apache.spark.sql.Encoders; //导入依赖的package包/类
public static void main(String[] args) {
SparkSession spark = SparkSession
.builder()
.appName("Dataset-JavaBean")
.master("local[4]")
.getOrCreate();
//
// The Java API requires you to explicitly instantiate an encoder for
// any JavaBean you want to use for schema inference
//
Encoder<Number> numberEncoder = Encoders.bean(Number.class);
//
// Create a container of the JavaBean instances
//
List<Number> data = Arrays.asList(
new Number(1, "one", "un"),
new Number(2, "two", "deux"),
new Number(3, "three", "trois"));
//
// Use the encoder and the container of JavaBean instances to create a
// Dataset
//
Dataset<Number> ds = spark.createDataset(data, numberEncoder);
System.out.println("*** here is the schema inferred from the bean");
ds.printSchema();
System.out.println("*** here is the data");
ds.show();
// Use the convenient bean-inferred column names to query
System.out.println("*** filter by one column and fetch others");
ds.where(col("i").gt(2)).select(col("english"), col("french")).show();
spark.stop();
}
示例3: readText
import org.apache.spark.sql.Encoders; //导入依赖的package包/类
private Dataset<Row> readText(String path) throws Exception {
Dataset<Row> lines = Contexts.getSparkSession().read().text(path);
if (config.hasPath("translator")) {
Dataset<Tuple2<String, String>> keyedLines = lines.map(
new PrepareLineForTranslationFunction(), Encoders.tuple(Encoders.STRING(), Encoders.STRING()));
TranslateFunction<String, String> translateFunction = new TranslateFunction<>(config.getConfig("translator"));
return keyedLines.flatMap(translateFunction, RowEncoder.apply(translateFunction.getSchema()));
}
else {
return lines;
}
}
示例4: getMaxInt
import org.apache.spark.sql.Encoders; //导入依赖的package包/类
public static Dataset getMaxInt(Dataset ds, final String columnName){
Encoder<Integer> integerEncoder = Encoders.INT();
log.debug("getMaxInt on "+columnName);
Dataset dso = ds.mapPartitions(new MapPartitionsFunction() {
List<Integer> result = new ArrayList<>();
@Override
public Iterator call(Iterator input) throws Exception {
int curMax=-1;
while (input.hasNext()) {
Integer wInt = ((Row) input.next()).getAs(columnName);
//only add if we found large value
//Think of this a reduce before the partition reduce
log.debug("wInt "+ wInt.intValue());
log.debug("curMax"+ curMax);
log.debug("Checking max int");
if (wInt.intValue()>curMax) {
result.add(wInt);
curMax = wInt.intValue();
}
}
return result.iterator();
}
}, integerEncoder);
return dso.toDF(columnName).agg(max(columnName));
}
示例5: start
import org.apache.spark.sql.Encoders; //导入依赖的package包/类
private void start() {
SparkSession spark = SparkSession.builder()
.appName("Array to Dataset")
// .master("local")
.master("spark://10.0.100.81:7077")
.getOrCreate();
String[] l = new String[] { "a", "b", "c", "d" };
List<String> data = Arrays.asList(l);
Dataset<String> df = spark.createDataset(data, Encoders.STRING());
df.show();
}
示例6: start
import org.apache.spark.sql.Encoders; //导入依赖的package包/类
private void start() {
SparkSession spark = SparkSession.builder()
.appName("Array to Dataframe")
.master("local")
.getOrCreate();
String[] l = new String[] { "a", "b", "c", "d" };
List<String> data = Arrays.asList(l);
Dataset<String> ds = spark.createDataset(data, Encoders.STRING());
Dataset<Row> df = ds.toDF();
df.show();
}
示例7: start
import org.apache.spark.sql.Encoders; //导入依赖的package包/类
private void start() {
SparkSession spark = SparkSession.builder().master("local").getOrCreate();
List<Integer> data = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
Dataset<Integer> df = spark.createDataset(data, Encoders.INT());
df.show();
df.printSchema();
Integer sumByReduce = df.reduce(new SumByReduce());
System.out.println("Sum should be 55 and it is... " + sumByReduce);
}
示例8: start
import org.apache.spark.sql.Encoders; //导入依赖的package包/类
private void start() {
SparkSession spark = SparkSession.builder().appName("CSV to Dataset<Book>").master("local").getOrCreate();
String filename = "data/books.csv";
Dataset<Row> df = spark.read().format("csv").option("inferSchema", "true").option("header", "true")
.load(filename);
df.show();
Dataset<Book> bookDf = df.map(new BookMapper(), Encoders.bean(Book.class));
bookDf.show();
}
示例9: start
import org.apache.spark.sql.Encoders; //导入依赖的package包/类
private void start() {
SparkSession spark = SparkSession.builder().appName("CSV to Dataset<Book> as JSON").master("local").getOrCreate();
String filename = "data/books.csv";
Dataset<Row> df = spark.read().format("csv").option("inferSchema", "true").option("header", "true")
.load(filename);
df.show();
Dataset<String> bookDf = df.map(new BookMapper(), Encoders.STRING());
bookDf.show(20,132);
Dataset<Row> bookAsJsonDf = spark.read().json(bookDf);
bookAsJsonDf.show();
}
示例10: start
import org.apache.spark.sql.Encoders; //导入依赖的package包/类
private void start() {
SparkSession spark = SparkSession.builder().appName("Book URL Builder").master("local").getOrCreate();
String filename = "data/books.csv";
Dataset<Row> df = spark.read().format("csv").option("inferSchema", "true").option("header", "true")
.load(filename);
df.show();
Dataset<String> ds = df.map(new BookUrlBuilder(), Encoders.STRING());
ds.printSchema();
ds.show(20, 80);
}
示例11: test0FailOnIndexCreationDisabled
import org.apache.spark.sql.Encoders; //导入依赖的package包/类
@Test(expected = EsHadoopIllegalArgumentException.class)
public void test0FailOnIndexCreationDisabled() throws Exception {
String target = wrapIndex("test-nonexisting/data");
JavaStreamingQueryTestHarness<RecordBean> test = new JavaStreamingQueryTestHarness<>(spark, Encoders.bean(RecordBean.class));
RecordBean doc1 = new RecordBean();
doc1.setId(1);
doc1.setName("Spark");
RecordBean doc2 = new RecordBean();
doc2.setId(2);
doc2.setName("Hadoop");
RecordBean doc3 = new RecordBean();
doc3.setId(3);
doc3.setName("YARN");
Dataset<RecordBean> dataset = test
.withInput(doc1)
.withInput(doc2)
.withInput(doc3)
.expectingToThrow(EsHadoopIllegalArgumentException.class)
.stream();
test.run(
dataset.writeStream()
.option("checkpointLocation", checkpoint(target))
.option(ES_INDEX_AUTO_CREATE, "no")
.format("es"),
target
);
assertTrue(!RestUtils.exists(target));
}
开发者ID:elastic,项目名称:elasticsearch-hadoop,代码行数:35,代码来源:AbstractJavaEsSparkStructuredStreamingTest.java
示例12: test1BasicWrite
import org.apache.spark.sql.Encoders; //导入依赖的package包/类
@Test
public void test1BasicWrite() throws Exception {
String target = wrapIndex("test-write/data");
JavaStreamingQueryTestHarness<RecordBean> test = new JavaStreamingQueryTestHarness<>(spark, Encoders.bean(RecordBean.class));
RecordBean doc1 = new RecordBean();
doc1.setId(1);
doc1.setName("Spark");
RecordBean doc2 = new RecordBean();
doc2.setId(2);
doc2.setName("Hadoop");
RecordBean doc3 = new RecordBean();
doc3.setId(3);
doc3.setName("YARN");
Dataset<RecordBean> dataset = test
.withInput(doc1)
.withInput(doc2)
.withInput(doc3)
.stream();
test.run(
dataset.writeStream()
.option("checkpointLocation", checkpoint(target))
.format("es"),
target
);
assertTrue(RestUtils.exists(target));
assertThat(RestUtils.get(target + "/_search?"), containsString("Spark"));
assertThat(RestUtils.get(target + "/_search?"), containsString("Hadoop"));
assertThat(RestUtils.get(target + "/_search?"), containsString("YARN"));
}
开发者ID:elastic,项目名称:elasticsearch-hadoop,代码行数:36,代码来源:AbstractJavaEsSparkStructuredStreamingTest.java
示例13: test1WriteWithMappingId
import org.apache.spark.sql.Encoders; //导入依赖的package包/类
@Test
public void test1WriteWithMappingId() throws Exception {
String target = wrapIndex("test-write-id/data");
JavaStreamingQueryTestHarness<RecordBean> test = new JavaStreamingQueryTestHarness<>(spark, Encoders.bean(RecordBean.class));
RecordBean doc1 = new RecordBean();
doc1.setId(1);
doc1.setName("Spark");
RecordBean doc2 = new RecordBean();
doc2.setId(2);
doc2.setName("Hadoop");
RecordBean doc3 = new RecordBean();
doc3.setId(3);
doc3.setName("YARN");
Dataset<RecordBean> dataset = test
.withInput(doc1)
.withInput(doc2)
.withInput(doc3)
.stream();
test.run(
dataset.writeStream()
.option("checkpointLocation", checkpoint(target))
.option("es.mapping.id", "id")
.format("es"),
target
);
assertEquals(3, JavaEsSpark.esRDD(new JavaSparkContext(spark.sparkContext()), target).count());
assertTrue(RestUtils.exists(target + "/1"));
assertTrue(RestUtils.exists(target + "/2"));
assertTrue(RestUtils.exists(target + "/3"));
assertThat(RestUtils.get(target + "/_search?"), containsString("Spark"));
}
开发者ID:elastic,项目名称:elasticsearch-hadoop,代码行数:39,代码来源:AbstractJavaEsSparkStructuredStreamingTest.java
示例14: test1WriteWithMappingExclude
import org.apache.spark.sql.Encoders; //导入依赖的package包/类
@Test
public void test1WriteWithMappingExclude() throws Exception {
String target = wrapIndex("test-mapping-exclude/data");
JavaStreamingQueryTestHarness<RecordBean> test = new JavaStreamingQueryTestHarness<>(spark, Encoders.bean(RecordBean.class));
RecordBean doc1 = new RecordBean();
doc1.setId(1);
doc1.setName("Spark");
RecordBean doc2 = new RecordBean();
doc2.setId(2);
doc2.setName("Hadoop");
RecordBean doc3 = new RecordBean();
doc3.setId(3);
doc3.setName("YARN");
Dataset<RecordBean> dataset = test
.withInput(doc1)
.withInput(doc2)
.withInput(doc3)
.stream();
test.run(
dataset.writeStream()
.option("checkpointLocation", checkpoint(target))
.option(ES_MAPPING_EXCLUDE, "name")
.format("es"),
target
);
assertTrue(RestUtils.exists(target));
assertThat(RestUtils.get(target + "/_search?"), not(containsString("Spark")));
assertThat(RestUtils.get(target + "/_search?"), not(containsString("Hadoop")));
assertThat(RestUtils.get(target + "/_search?"), not(containsString("YARN")));
}
开发者ID:elastic,项目名称:elasticsearch-hadoop,代码行数:37,代码来源:AbstractJavaEsSparkStructuredStreamingTest.java
示例15: test1MultiIndexWrite
import org.apache.spark.sql.Encoders; //导入依赖的package包/类
@Test
public void test1MultiIndexWrite() throws Exception {
String target = wrapIndex("test-write-tech-{name}/data");
JavaStreamingQueryTestHarness<RecordBean> test = new JavaStreamingQueryTestHarness<>(spark, Encoders.bean(RecordBean.class));
RecordBean doc1 = new RecordBean();
doc1.setId(1);
doc1.setName("spark");
RecordBean doc2 = new RecordBean();
doc2.setId(2);
doc2.setName("hadoop");
Dataset<RecordBean> dataset = test
.withInput(doc1)
.withInput(doc2)
.stream();
test.run(
dataset.writeStream()
.option("checkpointLocation", checkpoint(target))
.format("es"),
target
);
assertTrue(RestUtils.exists(wrapIndex("test-write-tech-spark/data")));
assertTrue(RestUtils.exists(wrapIndex("test-write-tech-hadoop/data")));
assertThat(RestUtils.get(wrapIndex("test-write-tech-spark/data/_search?")), containsString("\"name\":\"spark\""));
assertThat(RestUtils.get(wrapIndex("test-write-tech-hadoop/data/_search?")), containsString("\"name\":\"hadoop\""));
}
开发者ID:elastic,项目名称:elasticsearch-hadoop,代码行数:32,代码来源:AbstractJavaEsSparkStructuredStreamingTest.java