本文整理汇总了Java中org.apache.spark.streaming.api.java.JavaStreamingContext类的典型用法代码示例。如果您正苦于以下问题:Java JavaStreamingContext类的具体用法?Java JavaStreamingContext怎么用?Java JavaStreamingContext使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
JavaStreamingContext类属于org.apache.spark.streaming.api.java包,在下文中一共展示了JavaStreamingContext类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
public static void main(String[] args) {
SparkConf conf = new SparkConf()
.setAppName("kafka-sandbox")
.setMaster("local[*]");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000));
Set<String> topics = Collections.singleton("mytopic");
Map<String, String> kafkaParams = new HashMap<>();
kafkaParams.put("metadata.broker.list", "localhost:9092");
JavaPairInputDStream<String, String> directKafkaStream = KafkaUtils.createDirectStream(ssc,
String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topics);
directKafkaStream.foreachRDD(rdd -> {
System.out.println("--- New RDD with " + rdd.partitions().size()
+ " partitions and " + rdd.count() + " records");
rdd.foreach(record -> System.out.println(record._2));
});
ssc.start();
ssc.awaitTermination();
}
示例2: main
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
public static void main(String[] args) throws InterruptedException {
SparkConf sc = new SparkConf().setAppName("POC-Kafka-New");
try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(2000))) {
JavaPairInputDStream<String, String> stream = KafkaUtils.createDirectStream(
jsc, String.class, String.class, StringDecoder.class, StringDecoder.class,
Collections.singletonMap("metadata.broker.list", KAFKA_HOST_PORT),
Collections.singleton(EXAMPLE_TOPIC));
JavaDStream<ExampleXML> records = stream.map(t -> t._2()).map(new ParseXML());
records.foreachRDD(rdd -> System.out.printf("Amount of XMLs: %d\n", rdd.count()));
jsc.start();
jsc.awaitTermination();
}
}
示例3: run
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
private void run(CompositeConfiguration conf) {
// Spark conf
SparkConf sparkConf = new SparkConf().setAppName("TwitterSparkCrawler").setMaster(conf.getString("spark.master"))
.set("spark.serializer", conf.getString("spark.serializer"));
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(conf.getLong("stream.duration")));
// Twitter4J
// IMPORTANT: put keys in twitter4J.properties
Configuration twitterConf = ConfigurationContext.getInstance();
Authorization twitterAuth = AuthorizationFactory.getInstance(twitterConf);
// Create twitter stream
String[] filters = { "#Car" };
TwitterUtils.createStream(jssc, twitterAuth, filters).print();
// Start the computation
jssc.start();
jssc.awaitTermination();
}
示例4: main
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
public static void main(String[] args) throws InterruptedException {
SparkConf sc = new SparkConf().setAppName("POC-Streaming");
try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(2000))) {
//JavaDStream<SampleXML> records = jsc.textFileStream("input/").map(new ParseXML());
//textFileStream process lines of files, so xml has to be 1 line to work //alternative below
JavaRDD<String> files = jsc.sparkContext().wholeTextFiles("input/").map(tuple -> tuple._2());
Queue<JavaRDD<String>> rddQueue = new LinkedList<>();
rddQueue.add(files);
JavaDStream<String> records = jsc.queueStream(rddQueue);
records.foreachRDD(rdd -> System.out.printf("Amount of XMLs: %d\n", rdd.count()));
jsc.start();
jsc.awaitTermination();
}
}
示例5: main
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
public static void main(String[] args) throws InterruptedException, IOException {
SparkConf sc = new SparkConf().setAppName("POC-BigQuery");
try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(60000))) {
JavaPairInputDStream<String, String> stream = KafkaUtils.createDirectStream(
jsc, String.class, String.class, StringDecoder.class, StringDecoder.class,
Collections.singletonMap("metadata.broker.list", KAFKA_HOST_PORT), Collections.singleton(EXAMPLE_TOPIC));
Configuration conf = new Configuration();
BigQueryConfiguration.configureBigQueryOutput(conf, BQ_EXAMPLE_TABLE, BQ_EXAMPLE_SCHEMA);
conf.set("mapreduce.job.outputformat.class", BigQueryOutputFormat.class.getName());
JavaDStream<ExampleXML> records = stream.map(t -> t._2()).map(new ParseXML());
records.foreachRDD(rdd -> {
System.out.printf("Amount of XMLs: %d\n", rdd.count());
long time = System.currentTimeMillis();
rdd.mapToPair(new PrepToBQ()).saveAsNewAPIHadoopDataset(conf);
System.out.printf("Sent to BQ in %fs\n", (System.currentTimeMillis()-time)/1000f);
});
jsc.start();
jsc.awaitTermination();
}
}
示例6: providesKafkaInputStream
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
@Provides
JavaInputDStream<ConsumerRecord<String, RawRating>> providesKafkaInputStream(JavaStreamingContext streamingContext) {
Map<String, Object> kafkaParams = new HashedMap();
kafkaParams.put("bootstrap.servers", "localhost:9092");
kafkaParams.put("key.deserializer", StringDeserializer.class);
kafkaParams.put("value.deserializer", JsonDeserializer.class);
kafkaParams.put("serializedClass", RawRating.class);
kafkaParams.put("group.id", "rating_stream");
kafkaParams.put("auto.offset.reset", "latest");
kafkaParams.put("enable.auto.commit", false);
Collection<String> topics = Arrays.asList("topicA", "topicB");
return KafkaUtils.createDirectStream(
streamingContext,
LocationStrategies.PreferConsistent(),
ConsumerStrategies.<String, RawRating>Subscribe(topics, kafkaParams)
);
}
示例7: checkpoint
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
private void checkpoint(JavaStreamingContext jssc, CheckpointDir checkpointDir) {
Path rootCheckpointPath = checkpointDir.getRootCheckpointDir();
Path sparkCheckpointPath = checkpointDir.getSparkCheckpointDir();
Path beamCheckpointPath = checkpointDir.getBeamCheckpointDir();
try {
FileSystem fileSystem =
rootCheckpointPath.getFileSystem(jssc.sparkContext().hadoopConfiguration());
if (!fileSystem.exists(rootCheckpointPath)) {
fileSystem.mkdirs(rootCheckpointPath);
}
if (!fileSystem.exists(sparkCheckpointPath)) {
fileSystem.mkdirs(sparkCheckpointPath);
}
if (!fileSystem.exists(beamCheckpointPath)) {
fileSystem.mkdirs(beamCheckpointPath);
}
} catch (IOException e) {
throw new RuntimeException("Failed to create checkpoint dir", e);
}
jssc.checkpoint(sparkCheckpointPath.toString());
}
示例8: main
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
System.setProperty("hadoop.home.dir", "E:\\hadoop");
final String ip = "10.0.75.1";
final int port = Integer.parseInt("9000");
final String checkpointDirectory = "E:\\hadoop\\checkpoint";
// Function to create JavaStreamingContext without any output operations
// (used to detect the new context)
Function0<JavaStreamingContext> createContextFunc = new Function0<JavaStreamingContext>() {
@Override
public JavaStreamingContext call() {
return createContext(ip, port, checkpointDirectory);
}
};
JavaStreamingContext ssc = JavaStreamingContext.getOrCreate(checkpointDirectory, createContextFunc);
ssc.start();
ssc.awaitTermination();
}
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:20,代码来源:WordCountRecoverableEx.java
示例9: main
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
Flags.setFromCommandLineArgs(THE_OPTIONS, args);
// 初始化Spark Conf.
SparkConf conf = new SparkConf().setAppName("A SECTONG Application: Apache Log Analysis with Spark");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaStreamingContext jssc = new JavaStreamingContext(sc, Flags.getInstance().getSlideInterval());
SQLContext sqlContext = new SQLContext(sc);
// 初始化参数
HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(Flags.getInstance().getKafka_topic().split(",")));
HashMap<String, String> kafkaParams = new HashMap<String, String>();
kafkaParams.put("metadata.broker.list", Flags.getInstance().getKafka_broker());
// 从Kafka Stream获取数据
JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(jssc, String.class, String.class,
StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet);
JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
private static final long serialVersionUID = 5266880065425088203L;
public String call(Tuple2<String, String> tuple2) {
return tuple2._2();
}
});
JavaDStream<ApacheAccessLog> accessLogsDStream = lines.flatMap(line -> {
List<ApacheAccessLog> list = new ArrayList<>();
try {
// 映射每一行
list.add(ApacheAccessLog.parseFromLogLine(line));
return list;
} catch (RuntimeException e) {
return list;
}
}).cache();
accessLogsDStream.foreachRDD(rdd -> {
// rdd to DataFrame
DataFrame df = sqlContext.createDataFrame(rdd, ApacheAccessLog.class);
// 写入Parquet文件
df.write().partitionBy("ipAddress", "method", "responseCode").mode(SaveMode.Append).parquet(Flags.getInstance().getParquetFile());
return null;
});
// 启动Streaming服务器
jssc.start(); // 启动计算
jssc.awaitTermination(); // 等待终止
}
示例10: main
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
public static void main(String[] args)
{
SparkConf conf = new SparkConf();
conf.setAppName("Wordcount Background");
conf.setMaster("local");
JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(15));
JavaDStream<String> lines = ssc.textFileStream("/home/rahul/DATASET");
JavaDStream<String> words = lines.flatMap(WORDS_EXTRACTOR);
JavaPairDStream<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
JavaPairDStream<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
counter.print();
ssc.start();
ssc.awaitTermination();
/*JavaRDD<String> file = context.textFile("/home/rahul/Desktop/palestine.txt");
JavaRDD<String> words = file.flatMap(WORDS_EXTRACTOR);
JavaPairRDD<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
JavaPairRDD<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
counter.saveAsTextFile("/home/rahul/Desktop/wc");
context.close();*/
}
示例11: run
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
public void run() throws IOException {
SparkConf conf = new SparkConf();
conf.setAppName(getAppName());
conf.set(SPARK_SERIALIZER, ORG_APACHE_SPARK_SERIALIZER_KRYO_SERIALIZER);
JavaSparkUtil.packProjectJars(conf);
setupSparkConf(conf);
JavaStreamingContext ssc = new JavaStreamingContext(conf, getDuration());
List<JavaDStream<T>> streamsList = getStreamsList(ssc);
// Union all the streams if there is more than 1 stream
JavaDStream<T> streams = unionStreams(ssc, streamsList);
JavaPairDStream<String, RowMutation> pairDStream = streams.mapToPair(new PairFunction<T, String, RowMutation>() {
public Tuple2<String, RowMutation> call(T t) {
RowMutation rowMutation = convert(t);
return new Tuple2<String, RowMutation>(rowMutation.getRowId(), rowMutation);
}
});
pairDStream.foreachRDD(getFunction());
ssc.start();
ssc.awaitTermination();
}
示例12: main
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
public static void main(String[] args) throws InterruptedException {
String messagingServiceHost = System.getenv("MESSAGING_SERVICE_HOST");
if (messagingServiceHost != null) {
host = messagingServiceHost;
}
LOG.info("host = {}", host);
String messagingServicePort = System.getenv("MESSAGING_SERVICE_PORT");
if (messagingServicePort != null) {
port = Integer.valueOf(messagingServicePort);
}
LOG.info("port = {}", port);
JavaStreamingContext ssc = JavaStreamingContext.getOrCreate(CHECKPOINT_DIR, AMQPTemperature::createStreamingContext);
ssc.start();
ssc.awaitTermination();
}
示例13: main
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
public static void main(String[] args) throws InterruptedException, IOException, JAXBException {
SparkConf sc = new SparkConf().setAppName("Receiving-KafkaToBQ");
try (JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(60000))) {
JavaPairDStream<String, String> stream = new KafkaInputWithOffsets(
KAFKA_HOST_PORT, EXAMPLE_TOPIC, ZOOKEEPER_HOST, ZK_PATH).createResumableStream(jsc);
stream.foreachRDD(IdleStop.create(jsc, 2, "XMLs count: %d\n"));
stream
.mapToPair(parseXml())
.filter(t -> t != null)
.mapToPair(prepToBq())
.foreachRDD(BigQueryHelper.outputTo(BQ_EXAMPLE_TABLE, BQ_EXAMPLE_SCHEMA));
jsc.start();
jsc.awaitTermination();
}
}
示例14: BatchUpdateFunction
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
BatchUpdateFunction(Config config,
Class<K> keyClass,
Class<M> messageClass,
Class<? extends Writable> keyWritableClass,
Class<? extends Writable> messageWritableClass,
String dataDirString,
String modelDirString,
BatchLayerUpdate<K,M,U> updateInstance,
JavaStreamingContext streamingContext) {
this.keyClass = keyClass;
this.messageClass = messageClass;
this.keyWritableClass = keyWritableClass;
this.messageWritableClass = messageWritableClass;
this.dataDirString = dataDirString;
this.modelDirString = modelDirString;
this.updateBroker = ConfigUtils.getOptionalString(config, "oryx.update-topic.broker");
this.updateTopic = ConfigUtils.getOptionalString(config, "oryx.update-topic.message.topic");
this.updateInstance = updateInstance;
this.sparkContext = streamingContext.sparkContext();
}
示例15: main
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
public static void main(String[] args) throws InterruptedException {
// getting AMQP messaging service connection information
String messagingServiceHost = System.getenv("MESSAGING_SERVICE_HOST");
if (messagingServiceHost != null) {
host = messagingServiceHost;
}
String messagingServicePort = System.getenv("MESSAGING_SERVICE_PORT");
if (messagingServicePort != null) {
port = Integer.valueOf(messagingServicePort);
}
log.info("AMQP messaging service hostname {}:{}", host, port);
// getting credentials for authentication
username = System.getenv("SPARK_DRIVER_USERNAME");
password = System.getenv("SPARK_DRIVER_PASSWORD");
log.info("Credentials {}/{}", username, password);
JavaStreamingContext ssc = JavaStreamingContext.getOrCreate(CHECKPOINT_DIR, TemperatureAnalyzer::createStreamingContext);
ssc.start();
ssc.awaitTermination();
}