当前位置: 首页>>代码示例>>Java>>正文


Java Encoder类代码示例

本文整理汇总了Java中org.apache.spark.sql.Encoder的典型用法代码示例。如果您正苦于以下问题:Java Encoder类的具体用法?Java Encoder怎么用?Java Encoder使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


Encoder类属于org.apache.spark.sql包,在下文中一共展示了Encoder类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: extractEntry

import org.apache.spark.sql.Encoder; //导入依赖的package包/类
/**
 * Extracts the given resource type from the RDD of bundles and returns
 * it as a Dataset of that type.
 *
 * @param spark the spark session
 * @param bundles an RDD of FHIR Bundles
 * @param resourceName the FHIR name of the resource type to extract
 *     (e.g., condition, patient. etc).
 * @param encoders the Encoders instance defining how the resources are encoded.
 * @param <T> the type of the resource being extracted from the bundles.
 * @return a dataset of the given resource
 */
public static <T extends IBaseResource> Dataset<T> extractEntry(SparkSession spark,
    JavaRDD<Bundle> bundles,
    String resourceName,
    FhirEncoders encoders) {

  RuntimeResourceDefinition def = context.getResourceDefinition(resourceName);

  JavaRDD<T> resourceRdd = bundles.flatMap(new ToResource<T>(def.getName()));

  Encoder<T> encoder = encoders.of((Class<T>) def.getImplementingClass());

  return spark.createDataset(resourceRdd.rdd(), encoder);
}
 
开发者ID:cerner,项目名称:bunsen,代码行数:26,代码来源:Bundles.java

示例2: main

import org.apache.spark.sql.Encoder; //导入依赖的package包/类
public static void main(String[] args) {
    SparkSession spark = SparkSession
        .builder()
        .appName("Dataset-JavaBean")
        .master("local[4]")
        .getOrCreate();

    //
    // The Java API requires you to explicitly instantiate an encoder for
    // any JavaBean you want to use for schema inference
    //
    Encoder<Number> numberEncoder = Encoders.bean(Number.class);
    //
    // Create a container of the JavaBean instances
    //
    List<Number> data = Arrays.asList(
            new Number(1, "one", "un"),
            new Number(2, "two", "deux"),
            new Number(3, "three", "trois"));
    //
    // Use the encoder and the container of JavaBean instances to create a
    // Dataset
    //
    Dataset<Number> ds = spark.createDataset(data, numberEncoder);

    System.out.println("*** here is the schema inferred from the bean");
    ds.printSchema();

    System.out.println("*** here is the data");
    ds.show();

    // Use the convenient bean-inferred column names to query
    System.out.println("*** filter by one column and fetch others");
    ds.where(col("i").gt(2)).select(col("english"), col("french")).show();

    spark.stop();
}
 
开发者ID:spirom,项目名称:learning-spark-with-java,代码行数:38,代码来源:JavaBean.java

示例3: getMaxInt

import org.apache.spark.sql.Encoder; //导入依赖的package包/类
public static Dataset getMaxInt(Dataset ds, final String columnName){
    Encoder<Integer> integerEncoder = Encoders.INT();

    log.debug("getMaxInt on "+columnName);
    Dataset dso = ds.mapPartitions(new MapPartitionsFunction() {
        List<Integer> result = new ArrayList<>();

        @Override
        public Iterator call(Iterator input) throws Exception {
            int curMax=-1;
            while (input.hasNext()) {


                Integer wInt = ((Row) input.next()).getAs(columnName);

                //only add if we found large value
                //Think of this a reduce before the partition reduce
                log.debug("wInt "+ wInt.intValue());
                log.debug("curMax"+ curMax);

                log.debug("Checking max int");
                if (wInt.intValue()>curMax) {
                    result.add(wInt);
                    curMax = wInt.intValue();
                }
            }
            return result.iterator();

        }
    }, integerEncoder);


    return dso.toDF(columnName).agg(max(columnName));


}
 
开发者ID:kineticadb,项目名称:kinetica-connector-spark,代码行数:37,代码来源:TypeIntProcessor.java

示例4: bufferEncoder

import org.apache.spark.sql.Encoder; //导入依赖的package包/类
public Encoder<Average> bufferEncoder() {
	return Encoders.bean(Average.class);
}
 
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:4,代码来源:TypeSafeUDAF.java

示例5: outputEncoder

import org.apache.spark.sql.Encoder; //导入依赖的package包/类
public Encoder<Double> outputEncoder() {
	return Encoders.DOUBLE();
}
 
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:4,代码来源:TypeSafeUDAF.java

示例6: getMaxStringLen

import org.apache.spark.sql.Encoder; //导入依赖的package包/类
public static Dataset getMaxStringLen(Dataset ds, final String columnName){
    Encoder<Integer> integerEncoder = Encoders.INT();

    System.out.println("getMaxStringLen on "+columnName);
    Dataset dso = ds.mapPartitions(new MapPartitionsFunction() {
        List<Integer> result = new ArrayList<>();

        @Override
        public Iterator call(Iterator input) throws Exception {
            int curMax=-1;
            while (input.hasNext()) {

                Integer wInt = null;


                try {
                    wInt = ((Row) input.next()).getAs(columnName).toString().length();
                } catch(NullPointerException e){
                    wInt = 0;
                }


                //only add if we found large value
                //Think of this a inner partition reduce before collect()
                log.debug("wInt "+ wInt.intValue());
                log.debug("curMax"+ curMax);

                log.debug("Checking max int");
                if (wInt.intValue()>curMax) {
                    result.add(wInt);
                    curMax = wInt.intValue();
                }
            }
            return result.iterator();

        }
    }, integerEncoder);

    //return dso.agg(max("Year"));

    return dso.toDF(columnName).agg(max(columnName));


}
 
开发者ID:kineticadb,项目名称:kinetica-connector-spark,代码行数:45,代码来源:TypeStringProcessor.java

示例7: main

import org.apache.spark.sql.Encoder; //导入依赖的package包/类
public static void main(String[] args) {
    SparkSession spark = SparkSession
        .builder()
        .appName("Dataset-Basic")
        .master("local[4]")
        .getOrCreate();

    List<Integer> data = Arrays.asList(10, 11, 12, 13, 14, 15);
    Dataset<Integer> ds = spark.createDataset(data, Encoders.INT());

    System.out.println("*** only one column, and it always has the same name");
    ds.printSchema();

    ds.show();

    System.out.println("*** values > 12");

    // the harder way to filter
    Dataset<Integer> ds2 = ds.filter((Integer value) -> value > 12);

    ds.show();

    List<Tuple3<Integer, String, String>> tuples =
        Arrays.asList(
            new Tuple3<>(1, "one", "un"),
            new Tuple3<>(2, "two", "deux"),
            new Tuple3<>(3, "three", "trois"));

    Encoder<Tuple3<Integer, String, String>> encoder =
        Encoders.tuple(Encoders.INT(), Encoders.STRING(), Encoders.STRING());

    Dataset<Tuple3<Integer, String, String>> tupleDS =
        spark.createDataset(tuples, encoder);

    System.out.println("*** Tuple Dataset types");
    tupleDS.printSchema();

    // the tuple columns have unfriendly names, but you can use them to query
    System.out.println("*** filter by one column and fetch another");
    tupleDS.where(col("_1").gt(2)).select(col("_2"), col("_3")).show();

    spark.stop();
}
 
开发者ID:spirom,项目名称:learning-spark-with-java,代码行数:44,代码来源:Basic.java

示例8: bufferEncoder

import org.apache.spark.sql.Encoder; //导入依赖的package包/类
@Override
public Encoder<Row> bufferEncoder() {
  return RowEncoder.apply(SCHEMA);
}
 
开发者ID:cloudera-labs,项目名称:envelope,代码行数:5,代码来源:DatasetRowRuleWrapper.java

示例9: outputEncoder

import org.apache.spark.sql.Encoder; //导入依赖的package包/类
@Override
public Encoder<Row> outputEncoder() {
  return RowEncoder.apply(SCHEMA);
}
 
开发者ID:cloudera-labs,项目名称:envelope,代码行数:5,代码来源:DatasetRowRuleWrapper.java

示例10: JavaStreamingQueryTestHarness

import org.apache.spark.sql.Encoder; //导入依赖的package包/类
public JavaStreamingQueryTestHarness(SparkSession spark, Encoder<S> encoder) {
    this.harness = new StreamingQueryTestHarness<>(spark, encoder);
}
 
开发者ID:elastic,项目名称:elasticsearch-hadoop,代码行数:4,代码来源:JavaStreamingQueryTestHarness.java

示例11: getUriAndVersionEncoder

import org.apache.spark.sql.Encoder; //导入依赖的package包/类
/**
 * Returns the encoder for UrlAndVersion tuples.
 *
 * @return the encoder for UrlAndVersion tuples.
 */
public static Encoder<UrlAndVersion> getUriAndVersionEncoder() {
  return URI_AND_VERSION_ENCODER;
}
 
开发者ID:cerner,项目名称:bunsen,代码行数:9,代码来源:Hierarchies.java

示例12: getHierarchicalElementEncoder

import org.apache.spark.sql.Encoder; //导入依赖的package包/类
/**
 * Returns the encoder for hierarchical elements.
 *
 * @return the encoder for hierarchical elements.
 */
public static Encoder<HierarchicalElement> getHierarchicalElementEncoder() {
  return HIERARCHICAL_ELEMENT_ENCODER;
}
 
开发者ID:cerner,项目名称:bunsen,代码行数:9,代码来源:Hierarchies.java

示例13: getValueEncoder

import org.apache.spark.sql.Encoder; //导入依赖的package包/类
/**
 * Returns the encoder for values.
 *
 * @return the encoder for values.
 */
public static Encoder<Value> getValueEncoder() {
  return VALUE_ENCODER;
}
 
开发者ID:cerner,项目名称:bunsen,代码行数:9,代码来源:ValueSets.java

示例14: getValueSetEncoder

import org.apache.spark.sql.Encoder; //导入依赖的package包/类
/**
 * Returns the encoder for value sets.
 *
 * @return the encoder for value sets.
 */
public static Encoder<ValueSet> getValueSetEncoder() {
  return VALUE_SET_ENCODER;
}
 
开发者ID:cerner,项目名称:bunsen,代码行数:9,代码来源:ValueSets.java

示例15: getUrlAndVersionEncoder

import org.apache.spark.sql.Encoder; //导入依赖的package包/类
/**
 * Returns the encoder for UrlAndVersion tuples.
 *
 * @return the encoder for UrlAndVersion tuples.
 */
public static Encoder<UrlAndVersion> getUrlAndVersionEncoder() {
  return URL_AND_VERSION_ENCODER;
}
 
开发者ID:cerner,项目名称:bunsen,代码行数:9,代码来源:ValueSets.java


注:本文中的org.apache.spark.sql.Encoder类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。