当前位置: 首页>>代码示例>>Java>>正文


Java RDD.map方法代码示例

本文整理汇总了Java中org.apache.spark.rdd.RDD.map方法的典型用法代码示例。如果您正苦于以下问题:Java RDD.map方法的具体用法?Java RDD.map怎么用?Java RDD.map使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.spark.rdd.RDD的用法示例。


在下文中一共展示了RDD.map方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: doOperation

import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
private RDD<Element> doOperation(final GetRDDOfElements operation,
                                 final Context context,
                                 final AccumuloStore accumuloStore)
        throws OperationException {
    final Configuration conf = getConfiguration(operation);
    final SparkContext sparkContext = SparkContextUtil.getSparkSession(context, accumuloStore.getProperties()).sparkContext();
    sparkContext.hadoopConfiguration().addResource(conf);
    // Use batch scan option when performing seeded operation
    InputConfigurator.setBatchScan(AccumuloInputFormat.class, conf, true);
    addIterators(accumuloStore, conf, context.getUser(), operation);
    addRanges(accumuloStore, conf, operation);
    final RDD<Tuple2<Element, NullWritable>> pairRDD = sparkContext.newAPIHadoopRDD(conf,
            ElementInputFormat.class,
            Element.class,
            NullWritable.class);
    return pairRDD.map(new FirstElement(), ClassTagConstants.ELEMENT_CLASS_TAG);
}
 
开发者ID:gchq,项目名称:Gaffer,代码行数:18,代码来源:GetRDDOfElementsHandler.java

示例2: buildScan

import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
/**
 * Creates a {@code DataFrame} of all {@link Element}s from the specified groups.
 *
 * @return An {@link RDD} of {@link Row}s containing {@link Element}s whose group is in {@code groups}.
 */
@Override
public RDD<Row> buildScan() {
    try {
        LOGGER.info("Building GetRDDOfAllElements with view set to groups {}", StringUtils.join(groups, ','));
        final GetRDDOfAllElements operation = new GetRDDOfAllElements();
        operation.setView(view);
        operation.setOptions(options);
        final RDD<Element> rdd = store.execute(operation, context);
        return rdd.map(new ConvertElementToRow(usedProperties, propertyNeedsConversion, converterByProperty),
                ClassTagConstants.ROW_CLASS_TAG);
    } catch (final OperationException e) {
        LOGGER.error("OperationException while executing operation: {}", e);
        return null;
    }
}
 
开发者ID:gchq,项目名称:Gaffer,代码行数:21,代码来源:AccumuloStoreRelation.java

示例3: doOperationUsingElementInputFormat

import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
private RDD<Element> doOperationUsingElementInputFormat(final GetRDDOfAllElements operation,
                                                        final Context context,
                                                        final AccumuloStore accumuloStore)
        throws OperationException {
    final Configuration conf = getConfiguration(operation);
    addIterators(accumuloStore, conf, context.getUser(), operation);
    final String useBatchScannerRDD = operation.getOption(USE_BATCH_SCANNER_RDD);
    if (Boolean.parseBoolean(useBatchScannerRDD)) {
        InputConfigurator.setBatchScan(AccumuloInputFormat.class, conf, true);
    }
    final RDD<Tuple2<Element, NullWritable>> pairRDD = SparkContextUtil.getSparkSession(context, accumuloStore.getProperties()).sparkContext().newAPIHadoopRDD(conf,
            ElementInputFormat.class,
            Element.class,
            NullWritable.class);
    return pairRDD.map(new FirstElement(), ELEMENT_CLASS_TAG);
}
 
开发者ID:gchq,项目名称:Gaffer,代码行数:17,代码来源:GetRDDOfAllElementsHandler.java

示例4: convert

import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
@Override
public RDD<Tuple> convert(List<RDD<Tuple>> predecessors,
        PODistinct poDistinct) throws IOException {
    SparkUtil.assertPredecessorSize(predecessors, poDistinct, 1);
    RDD<Tuple> rdd = predecessors.get(0);

    ClassTag<Tuple2<Tuple, Object>> tuple2ClassManifest = SparkUtil
            .<Tuple, Object> getTuple2Manifest();

    RDD<Tuple2<Tuple, Object>> rddPairs = rdd.map(TO_KEY_VALUE_FUNCTION,
            tuple2ClassManifest);
    PairRDDFunctions<Tuple, Object> pairRDDFunctions
      = new PairRDDFunctions<Tuple, Object>(
            rddPairs, SparkUtil.getManifest(Tuple.class),
            SparkUtil.getManifest(Object.class), null);
    int parallelism = SparkUtil.getParallelism(predecessors, poDistinct);
    return pairRDDFunctions.reduceByKey(MERGE_VALUES_FUNCTION, parallelism)
            .map(TO_VALUE_FUNCTION, SparkUtil.getManifest(Tuple.class));
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:20,代码来源:DistinctConverter.java

示例5: convert

import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
@Override
public RDD<Tuple> convert(List<RDD<Tuple>> predecessorRdds, POLoad poLoad)
        throws IOException {
    // if (predecessors.size()!=0) {
    // throw new
    // RuntimeException("Should not have predecessors for Load. Got : "+predecessors);
    // }

    JobConf loadJobConf = SparkUtil.newJobConf(pigContext);
    configureLoader(physicalPlan, poLoad, loadJobConf);

    // don't know why but just doing this cast for now
    RDD<Tuple2<Text, Tuple>> hadoopRDD = sparkContext.newAPIHadoopFile(
            poLoad.getLFile().getFileName(), PigInputFormatSpark.class,
            Text.class, Tuple.class, loadJobConf);

    registerUdfFiles();
    // map to get just RDD<Tuple>
    return hadoopRDD.map(TO_TUPLE_FUNCTION,
            SparkUtil.getManifest(Tuple.class));
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:22,代码来源:LoadConverter.java

示例6: convert

import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
@Override
public RDD<Tuple> convert(List<RDD<Tuple>> predecessors, POSort sortOperator)
        throws IOException {
    SparkUtil.assertPredecessorSize(predecessors, sortOperator, 1);
    RDD<Tuple> rdd = predecessors.get(0);
    RDD<Tuple2<Tuple, Object>> rddPair = rdd.map(new ToKeyValueFunction(),
            SparkUtil.<Tuple, Object> getTuple2Manifest());

    JavaPairRDD<Tuple, Object> r = new JavaPairRDD<Tuple, Object>(rddPair,
            SparkUtil.getManifest(Tuple.class),
            SparkUtil.getManifest(Object.class));

    JavaPairRDD<Tuple, Object> sorted = r.sortByKey(
            sortOperator.new SortComparator(), true);
    JavaRDD<Tuple> mapped = sorted.mapPartitions(TO_VALUE_FUNCTION);

    return mapped.rdd();
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:19,代码来源:SortConverter.java

示例7: doCql3SaveToCassandra

import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
public static <W> void doCql3SaveToCassandra(RDD<W> rdd, ICassandraDeepJobConfig<W> writeConfig,
                                             Function1<W, Tuple2<Cells, Cells>> transformer) {
    if (!writeConfig.getIsWriteConfig()) {
        throw new IllegalArgumentException("Provided configuration object is not suitable for writing");
    }
    Tuple2<Map<String, ByteBuffer>, Map<String, ByteBuffer>> tuple = new Tuple2<>(null, null);

    RDD<Tuple2<Cells, Cells>> mappedRDD = rdd.map(transformer,
            ClassTag$.MODULE$.<Tuple2<Cells, Cells>>apply(tuple.getClass()));

    ((CassandraDeepJobConfig) writeConfig).createOutputTableIfNeeded(mappedRDD.first());

    final int pageSize = writeConfig.getBatchSize();
    int offset = 0;

    List<Tuple2<Cells, Cells>> elements = Arrays.asList((Tuple2<Cells, Cells>[]) mappedRDD.collect());
    List<Tuple2<Cells, Cells>> split;
    do {
        split = elements.subList(pageSize * (offset++), Math.min(pageSize * offset, elements.size()));

        Batch batch = QueryBuilder.batch();

        for (Tuple2<Cells, Cells> t : split) {
            Tuple2<String[], Object[]> bindVars = Utils.prepareTuple4CqlDriver(t);

            Insert insert = QueryBuilder
                    .insertInto(quote(writeConfig.getKeyspace()), quote(writeConfig.getTable()))
                    .values(bindVars._1(), bindVars._2());

            batch.add(insert);
        }
        writeConfig.getSession().execute(batch);

    } while (!split.isEmpty() && split.size() == pageSize);
}
 
开发者ID:Stratio,项目名称:deep-spark,代码行数:36,代码来源:CassandraUtils.java

示例8: convert

import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
@Override
public RDD<Tuple> convert(List<RDD<Tuple>> predecessors,
        POLocalRearrange physicalOperator) throws IOException {
    SparkUtil.assertPredecessorSize(predecessors, physicalOperator, 1);
    RDD<Tuple> rdd = predecessors.get(0);
    // call local rearrange to get key and value
    return rdd.map(new LocalRearrangeFunction(physicalOperator),
            SparkUtil.getManifest(Tuple.class));

}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:11,代码来源:LocalRearrangeConverter.java

示例9: convert

import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
@Override
public RDD<Tuple> convert(List<RDD<Tuple>> predecessors,
        POPackage physicalOperator) throws IOException {
    SparkUtil.assertPredecessorSize(predecessors, physicalOperator, 1);
    RDD<Tuple> rdd = predecessors.get(0);
    // package will generate the group from the result of the local
    // rearrange
    return rdd.map(new PackageFunction(physicalOperator, this.confBytes),
            SparkUtil.getManifest(Tuple.class));
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:11,代码来源:PackageConverter.java

示例10: convert

import org.apache.spark.rdd.RDD; //导入方法依赖的package包/类
@Override
public RDD<Tuple> convert(List<RDD<Tuple>> predecessors,
                          POSkewedJoin poSkewedJoin) throws IOException {

    SparkUtil.assertPredecessorSize(predecessors, poSkewedJoin, 2);
    LRs = new POLocalRearrange[2];
    this.poSkewedJoin = poSkewedJoin;

    createJoinPlans(poSkewedJoin.getJoinPlans());

    // extract the two RDDs
    RDD<Tuple> rdd1 = predecessors.get(0);
    RDD<Tuple> rdd2 = predecessors.get(1);

    // make (key, value) pairs, key has type Object, value has type Tuple
    RDD<Tuple2<Object, Tuple>> rdd1Pair = rdd1.map(new ExtractKeyFunction(
            this, 0), SparkUtil.<Object, Tuple>getTuple2Manifest());
    RDD<Tuple2<Object, Tuple>> rdd2Pair = rdd2.map(new ExtractKeyFunction(
            this, 1), SparkUtil.<Object, Tuple>getTuple2Manifest());

    // join fn is present in JavaPairRDD class ..
    JavaPairRDD<Object, Tuple> rdd1Pair_javaRDD = new JavaPairRDD<Object, Tuple>(
            rdd1Pair, SparkUtil.getManifest(Object.class),
            SparkUtil.getManifest(Tuple.class));
    JavaPairRDD<Object, Tuple> rdd2Pair_javaRDD = new JavaPairRDD<Object, Tuple>(
            rdd2Pair, SparkUtil.getManifest(Object.class),
            SparkUtil.getManifest(Tuple.class));

    // do the join
    JavaPairRDD<Object, Tuple2<Tuple, Tuple>> result_KeyValue = rdd1Pair_javaRDD
            .join(rdd2Pair_javaRDD);

    // map to get RDD<Tuple> from RDD<Object, Tuple2<Tuple, Tuple>> by
    // ignoring the key (of type Object) and appending the values (the
    // Tuples)
    JavaRDD<Tuple> result = result_KeyValue
            .mapPartitions(new ToValueFunction());

    // return type is RDD<Tuple>, so take it from JavaRDD<Tuple>
    return result.rdd();
}
 
开发者ID:sigmoidanalytics,项目名称:spork,代码行数:42,代码来源:SkewedJoinConverter.java


注:本文中的org.apache.spark.rdd.RDD.map方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。