本文整理匯總了Java中org.apache.spark.streaming.api.java.JavaDStream.foreachRDD方法的典型用法代碼示例。如果您正苦於以下問題:Java JavaDStream.foreachRDD方法的具體用法?Java JavaDStream.foreachRDD怎麽用?Java JavaDStream.foreachRDD使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.spark.streaming.api.java.JavaDStream
的用法示例。
在下文中一共展示了JavaDStream.foreachRDD方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: main
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) throws InterruptedException {
SparkConf sc = new SparkConf().setAppName("POC-Kafka-New");
try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(2000))) {
JavaPairInputDStream<String, String> stream = KafkaUtils.createDirectStream(
jsc, String.class, String.class, StringDecoder.class, StringDecoder.class,
Collections.singletonMap("metadata.broker.list", KAFKA_HOST_PORT),
Collections.singleton(EXAMPLE_TOPIC));
JavaDStream<ExampleXML> records = stream.map(t -> t._2()).map(new ParseXML());
records.foreachRDD(rdd -> System.out.printf("Amount of XMLs: %d\n", rdd.count()));
jsc.start();
jsc.awaitTermination();
}
}
示例2: main
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) throws InterruptedException {
SparkConf sc = new SparkConf().setAppName("POC-Streaming");
try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(2000))) {
//JavaDStream<SampleXML> records = jsc.textFileStream("input/").map(new ParseXML());
//textFileStream process lines of files, so xml has to be 1 line to work //alternative below
JavaRDD<String> files = jsc.sparkContext().wholeTextFiles("input/").map(tuple -> tuple._2());
Queue<JavaRDD<String>> rddQueue = new LinkedList<>();
rddQueue.add(files);
JavaDStream<String> records = jsc.queueStream(rddQueue);
records.foreachRDD(rdd -> System.out.printf("Amount of XMLs: %d\n", rdd.count()));
jsc.start();
jsc.awaitTermination();
}
}
示例3: main
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) throws InterruptedException, IOException {
SparkConf sc = new SparkConf().setAppName("POC-BigQuery");
try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(60000))) {
JavaPairInputDStream<String, String> stream = KafkaUtils.createDirectStream(
jsc, String.class, String.class, StringDecoder.class, StringDecoder.class,
Collections.singletonMap("metadata.broker.list", KAFKA_HOST_PORT), Collections.singleton(EXAMPLE_TOPIC));
Configuration conf = new Configuration();
BigQueryConfiguration.configureBigQueryOutput(conf, BQ_EXAMPLE_TABLE, BQ_EXAMPLE_SCHEMA);
conf.set("mapreduce.job.outputformat.class", BigQueryOutputFormat.class.getName());
JavaDStream<ExampleXML> records = stream.map(t -> t._2()).map(new ParseXML());
records.foreachRDD(rdd -> {
System.out.printf("Amount of XMLs: %d\n", rdd.count());
long time = System.currentTimeMillis();
rdd.mapToPair(new PrepToBQ()).saveAsNewAPIHadoopDataset(conf);
System.out.printf("Sent to BQ in %fs\n", (System.currentTimeMillis()-time)/1000f);
});
jsc.start();
jsc.awaitTermination();
}
}
示例4: main
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) throws IOException {
Flags.setFromCommandLineArgs(THE_OPTIONS, args);
// 初始化Spark Conf.
SparkConf conf = new SparkConf().setAppName("A SECTONG Application: Apache Log Analysis with Spark");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaStreamingContext jssc = new JavaStreamingContext(sc, Flags.getInstance().getSlideInterval());
SQLContext sqlContext = new SQLContext(sc);
// 初始化參數
HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(Flags.getInstance().getKafka_topic().split(",")));
HashMap<String, String> kafkaParams = new HashMap<String, String>();
kafkaParams.put("metadata.broker.list", Flags.getInstance().getKafka_broker());
// 從Kafka Stream獲取數據
JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(jssc, String.class, String.class,
StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet);
JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
private static final long serialVersionUID = 5266880065425088203L;
public String call(Tuple2<String, String> tuple2) {
return tuple2._2();
}
});
JavaDStream<ApacheAccessLog> accessLogsDStream = lines.flatMap(line -> {
List<ApacheAccessLog> list = new ArrayList<>();
try {
// 映射每一行
list.add(ApacheAccessLog.parseFromLogLine(line));
return list;
} catch (RuntimeException e) {
return list;
}
}).cache();
accessLogsDStream.foreachRDD(rdd -> {
// rdd to DataFrame
DataFrame df = sqlContext.createDataFrame(rdd, ApacheAccessLog.class);
// 寫入Parquet文件
df.write().partitionBy("ipAddress", "method", "responseCode").mode(SaveMode.Append).parquet(Flags.getInstance().getParquetFile());
return null;
});
// 啟動Streaming服務器
jssc.start(); // 啟動計算
jssc.awaitTermination(); // 等待終止
}
示例5: run
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
@Override
public void run() {
JMetalLogger.logger.info("Run method in the streaming data source invoked") ;
JMetalLogger.logger.info("Directory: " + directoryName) ;
JavaDStream<Integer> time = streamingContext
.textFileStream(directoryName)
.map(line -> Integer.parseInt(line)) ;
time.foreachRDD(numbers -> {
List<Integer> numberList = numbers.collect() ;
for (Integer number : numberList) {
System.out.println(number) ;
observable.setChanged();
observable.notifyObservers(new SingleObservedData<Integer>(number));
}
}) ;
}
示例6: start
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
private void start() {
// Create a local StreamingContext with two working thread and batch interval of
// 1 second
SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("Streaming Ingestion File System Text File to Dataframe");
JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));
JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils.getInputDirectory());
msgDataStream.print();
// Create JavaRDD<Row>
msgDataStream.foreachRDD(new RowProcessor());
jssc.start();
try {
jssc.awaitTermination();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
開發者ID:jgperrin,項目名稱:net.jgp.labs.spark,代碼行數:21,代碼來源:StreamingIngestionFileSystemTextFileToDataframeMultipleClassesApp.java
示例7: publishToNats
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
/**
* @param stream, the Spark Stream to publish to NATS
* @param dataEncoder, the function used to encode the Spark Stream Records into the NATS Message Payloads
*/
public <V extends Object> void publishToNats(final JavaDStream<V> stream, final Function<V, byte[]> dataEncoder) {
logger.trace("publishToNats(JavaDStream<String> stream)");
stream.foreachRDD((VoidFunction<JavaRDD<V>>) rdd -> {
logger.trace("stream.foreachRDD");
rdd.foreachPartitionAsync(objects -> {
logger.trace("rdd.foreachPartition");
final SparkToNatsConnector<?> connector = getConnector();
while(objects.hasNext()) {
final V obj = objects.next();
logger.trace("Will publish {}", obj);
connector.publishToNats(dataEncoder.apply(obj));
}
returnConnector(connector); // return to the pool for future reuse
});
});
}
示例8: write
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
/**
* Writes the content of the stream to the Kafka topic
* behind this producer.
*/
@edu.umd.cs.findbugs.annotations.SuppressWarnings(
value="SE_INNER_CLASS", justification="Uses state from outer class.")
public void write (JavaDStream<T> stream) {
stream.foreachRDD(new Function<JavaRDD<T>, Void>() {
@Override
public Void call(JavaRDD<T> rdd) throws Exception {
write(rdd);
return null;
}
});
}
示例9: main
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) {
//Window Specific property if Hadoop is not instaalled or HADOOP_HOME is not set
System.setProperty("hadoop.home.dir", "E:\\hadoop");
//Logger rootLogger = LogManager.getRootLogger();
//rootLogger.setLevel(Level.WARN);
SparkConf conf = new SparkConf().setAppName("KafkaExample").setMaster("local[*]");
String inputDirectory="E:\\hadoop\\streamFolder\\";
JavaSparkContext sc = new JavaSparkContext(conf);
JavaStreamingContext streamingContext = new JavaStreamingContext(sc, Durations.seconds(1));
// streamingContext.checkpoint("E:\\hadoop\\checkpoint");
Logger rootLogger = LogManager.getRootLogger();
rootLogger.setLevel(Level.WARN);
JavaDStream<String> streamfile = streamingContext.textFileStream(inputDirectory);
streamfile.print();
streamfile.foreachRDD(rdd-> rdd.foreach(x -> System.out.println(x)));
JavaPairDStream<LongWritable, Text> streamedFile = streamingContext.fileStream(inputDirectory, LongWritable.class, Text.class, TextInputFormat.class);
streamedFile.print();
streamingContext.start();
try {
streamingContext.awaitTermination();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
示例10: main
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) throws InterruptedException {
SparkConf sc = new SparkConf().setAppName("POC-Kafka");
try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(2000))) {
JavaPairReceiverInputDStream<String, String> stream = KafkaUtils.createStream(
jsc, ZK_HOST_PORT, "a_group_id", Collections.singletonMap(EXAMPLE_TOPIC, 1));
JavaDStream<ExampleXML> records = stream.map(t -> t._2()).map(new ParseXML());
records.foreachRDD(rdd -> System.out.printf("Amount of XMLs: %d\n", rdd.count()));
jsc.start();
jsc.awaitTermination();
}
}
示例11: validateTheReceptionOfMessages
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
protected void validateTheReceptionOfMessages(JavaStreamingContext ssc,
JavaReceiverInputDStream<String> stream) throws InterruptedException {
JavaDStream<String> messages = stream.repartition(3);
ExecutorService executor = Executors.newFixedThreadPool(6);
final int nbOfMessages = 5;
NatsPublisher np = getNatsPublisher(nbOfMessages);
if (logger.isDebugEnabled()) {
messages.print();
}
messages.foreachRDD(new VoidFunction<JavaRDD<String>>() {
private static final long serialVersionUID = 1L;
@Override
public void call(JavaRDD<String> rdd) throws Exception {
logger.debug("RDD received: {}", rdd.collect());
final long count = rdd.count();
if ((count != 0) && (count != nbOfMessages)) {
rightNumber = false;
logger.error("The number of messages received should have been {} instead of {}.", nbOfMessages, count);
}
TOTAL_COUNT.getAndAdd((int) count);
atLeastSomeData = atLeastSomeData || (count > 0);
for (String str :rdd.collect()) {
if (! str.startsWith(NatsPublisher.NATS_PAYLOAD)) {
payload = str;
}
}
}
});
closeTheValidation(ssc, executor, nbOfMessages, np);
}
示例12: validateTheReceptionOfIntegerMessages
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
protected void validateTheReceptionOfIntegerMessages(JavaStreamingContext ssc,
JavaReceiverInputDStream<Integer> stream) throws InterruptedException {
JavaDStream<Integer> messages = stream.repartition(3);
ExecutorService executor = Executors.newFixedThreadPool(6);
final int nbOfMessages = 5;
NatsPublisher np = getNatsPublisher(nbOfMessages);
if (logger.isDebugEnabled()) {
messages.print();
}
messages.foreachRDD(new VoidFunction<JavaRDD<Integer>>() {
private static final long serialVersionUID = 1L;
@Override
public void call(JavaRDD<Integer> rdd) throws Exception {
logger.debug("RDD received: {}", rdd.collect());
final long count = rdd.count();
if ((count != 0) && (count != nbOfMessages)) {
rightNumber = false;
logger.error("The number of messages received should have been {} instead of {}.", nbOfMessages, count);
}
TOTAL_COUNT.getAndAdd((int) count);
atLeastSomeData = atLeastSomeData || (count > 0);
for (Integer value :rdd.collect()) {
if (value < NatsPublisher.NATS_PAYLOAD_INT) {
payload = value.toString();
}
}
}
});
closeTheValidation(ssc, executor, nbOfMessages, np);
}
示例13: save
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
/**
* Save all RDDs in the given DStream to the given view.
* @param dstream
* @param view
*/
public static <T> void save(JavaDStream<T> dstream, final View<T> view) {
final String uri = view.getUri().toString();
dstream.foreachRDD(new Function2<JavaRDD<T>, Time, Void>() {
@Override
public Void call(JavaRDD<T> rdd, Time time) throws Exception {
save(rdd, uri);
return null;
}
});
}
示例14: configureDataContext
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
private void configureDataContext(JavaStreamingContext context) {
Map<String, Integer> baseTopicMap = new HashMap<>();
configurationContext.getDataTopics().forEach( dataTopic -> baseTopicMap.put(dataTopic, 1));
kafkaTopicService.createTopicsIfNotExist(configurationContext.getDataTopics(), configurationContext
.getKafkaReplicationFactor(), configurationContext.getKafkaPartitions());
HashMap<String, String> kafkaParams = new HashMap<>();
kafkaParams.put("zookeeper.connect", configurationContext.getZookeeperHostsQuorumWithPath());
kafkaParams.put("group.id", configurationContext.getGroupId());
/*
groupId must be the cluster groupId. Kafka assigns each partition of a topic to one, and one only, consumer of
the group.
Decision topics has only one partition (by default), so if we have two o more decision instances (consumers) reading the
same topic with the same groupId, only one instance will be able to read from the topic
*/
JavaPairDStream<String, byte[]> messages = KafkaUtils.createStream(context, String.class, byte[].class,
kafka.serializer.StringDecoder.class, kafka.serializer.DefaultDecoder.class, kafkaParams, baseTopicMap,
StorageLevel.MEMORY_AND_DISK_SER());
AvroDeserializeMessageFunction avroDeserializeMessageFunction = new AvroDeserializeMessageFunction();
JavaDStream<StratioStreamingMessage> insertRequests = messages.filter(
new FilterAvroMessagesByOperationFunction(STREAM_OPERATIONS.MANIPULATION.INSERT))
.map(avroDeserializeMessageFunction);
InsertIntoStreamFunction insertIntoStreamFunction = new InsertIntoStreamFunction(streamOperationService,
configurationContext.getZookeeperHostsQuorum());
insertRequests.foreachRDD(insertIntoStreamFunction);
}
示例15: main
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) {
Logger.getLogger("org").setLevel(Level.WARN);
Logger.getLogger("akka").setLevel(Level.WARN);
SparkConf sparkConf = new SparkConf().setMaster("spark://10.204.100.206:7077").setAppName("StreamingKafka101");
sparkConf.setJars(new String[] { "target\\TestProjects-1.0-SNAPSHOT.jar" });
//sparkConf.setExecutorEnv("executor-memory", "8G");
//sparkConf.setExecutorEnv("spark.executor.memory", "8G");
sparkConf.set("spark.executor.memory", "4G");
//sparkConf.set("executor-memory", "8G");
int duration = 2;
if(args.length > 0){
try{
duration = Integer.parseInt(args[0]);
System.out.println("duration changed to " + duration);
}catch(Exception e){
System.out.println("Duration reset to defaults");
}
}
JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(duration));
Map<String, Integer> topicMap = new HashMap<String, Integer>();
topicMap.put("loadtest", 4);
JavaPairReceiverInputDStream<String, String> kafkaStream = KafkaUtils.createStream(ssc,"10.204.100.172:2182","kafka-group1",topicMap);
JavaDStream<String> lines = kafkaStream.map(new Function<Tuple2<String, String>, String>() {
@Override
public String call(Tuple2<String, String> tuple2) {
return tuple2._2();
}
});
lines.foreachRDD(new Function<JavaRDD<String>, Void>() {
@Override
public Void call(JavaRDD<String> rdd) throws Exception {
System.out.println(new Date() + " Total records read: " + rdd.count() );
return null;
}
});
ssc.start();
ssc.awaitTermination();
}