本文整理汇总了Java中org.apache.spark.api.java.JavaPairRDD.saveAsNewAPIHadoopFile方法的典型用法代码示例。如果您正苦于以下问题:Java JavaPairRDD.saveAsNewAPIHadoopFile方法的具体用法?Java JavaPairRDD.saveAsNewAPIHadoopFile怎么用?Java JavaPairRDD.saveAsNewAPIHadoopFile使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.api.java.JavaPairRDD
的用法示例。
在下文中一共展示了JavaPairRDD.saveAsNewAPIHadoopFile方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public static void main(String[] args) throws IOException {
if (args.length < 1) {
System.err.println("Usage: RepartitionFastq <input path> <output path> <number of partitions>");
System.exit(1);
}
SparkConf conf = new SparkConf().setAppName("RepartitionFastq");
//conf.set("spark.default.parallelism", String.valueOf(args[2]));
JavaSparkContext sc = new JavaSparkContext(conf);
JavaPairRDD<Text, SequencedFragment> fastqRDD = sc.newAPIHadoopFile(args[0], FastqInputFormat.class, Text.class, SequencedFragment.class, sc.hadoopConfiguration());
JavaPairRDD<Text, SequencedFragment> repartitioned = fastqRDD.repartition(Integer.valueOf(args[2]));
repartitioned.saveAsNewAPIHadoopFile(args[1], Text.class, SequencedFragment.class, FastqOutputFormat.class, sc.hadoopConfiguration());
sc.stop();
}
示例2: main
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public static void main(String[] args) throws IOException {
SparkConf conf = new SparkConf().setAppName("SamToFastq");
sc = new JavaSparkContext(conf);
String in = args[0];
String out = args[1];
JavaPairRDD<LongWritable, SAMRecordWritable> bamPairRDD = sc.newAPIHadoopFile(in, AnySAMInputFormat.class, LongWritable.class, SAMRecordWritable.class, sc.hadoopConfiguration());
//Map to SAMRecord RDD
JavaRDD<SAMRecord> samRDD = bamPairRDD.map(v1 -> v1._2().get());
JavaPairRDD<Text, SequencedFragment> fastqrdd = mapSAMRecordsToFastq(samRDD);
fastqrdd.saveAsNewAPIHadoopFile(out, Text.class, SequencedFragment.class, FastqOutputFormat.class, sc.hadoopConfiguration());
sc.stop();
}
示例3: main
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public static void main(String[] args) throws IOException {
if (args.length < 1) {
System.err.println("Usage: MergeFastq <input path> <output path> <number of partitions>");
System.exit(1);
}
SparkConf conf = new SparkConf().setAppName("MergeFastq");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaPairRDD<Text, SequencedFragment> fastqRDD = sc.newAPIHadoopFile(args[0], FastqInputFormat.class, Text.class, SequencedFragment.class, sc.hadoopConfiguration());
JavaPairRDD<Text, SequencedFragment> coalesced = fastqRDD.coalesce(Integer.valueOf(args[2]));
coalesced.saveAsNewAPIHadoopFile(args[1], Text.class, SequencedFragment.class, FastqOutputFormat.class, sc.hadoopConfiguration());
sc.stop();
}
示例4: writeRecords
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public void writeRecords(JavaRDD<SAMRecord> records, Broadcast<SAMFileHeader> header, String outpath, SparkContext sc) {
JavaPairRDD<SAMRecord, SAMRecordWritable> bamWritableRDD = readsToWritable(records, header);
//Distribute records to HDFS as BAM
bamWritableRDD.saveAsNewAPIHadoopFile(outpath, SAMRecord.class, SAMRecordWritable.class, BAMHeaderOutputFormat.class, sc.hadoopConfiguration());
}