当前位置: 首页>>代码示例>>Java>>正文


Java JavaStreamingContext类代码示例

本文整理汇总了Java中org.apache.spark.streaming.api.java.JavaStreamingContext的典型用法代码示例。如果您正苦于以下问题:Java JavaStreamingContext类的具体用法?Java JavaStreamingContext怎么用?Java JavaStreamingContext使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


JavaStreamingContext类属于org.apache.spark.streaming.api.java包,在下文中一共展示了JavaStreamingContext类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
public static void main(String[] args) {

        SparkConf conf = new SparkConf()
                .setAppName("kafka-sandbox")
                .setMaster("local[*]");
        JavaSparkContext sc = new JavaSparkContext(conf);
        JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000));

        Set<String> topics = Collections.singleton("mytopic");
        Map<String, String> kafkaParams = new HashMap<>();
        kafkaParams.put("metadata.broker.list", "localhost:9092");

        JavaPairInputDStream<String, String> directKafkaStream = KafkaUtils.createDirectStream(ssc,
                String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topics);

        directKafkaStream.foreachRDD(rdd -> {
            System.out.println("--- New RDD with " + rdd.partitions().size()
                    + " partitions and " + rdd.count() + " records");
            rdd.foreach(record -> System.out.println(record._2));
        });

        ssc.start();
        ssc.awaitTermination();
    }
 
开发者ID:aseigneurin,项目名称:kafka-sandbox,代码行数:25,代码来源:SparkStringConsumer.java

示例2: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
public static void main(String[] args) throws InterruptedException {
  SparkConf sc = new SparkConf().setAppName("POC-Kafka-New");
  
  try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(2000))) {
    
    JavaPairInputDStream<String, String> stream = KafkaUtils.createDirectStream(
        jsc, String.class, String.class, StringDecoder.class, StringDecoder.class,
        Collections.singletonMap("metadata.broker.list", KAFKA_HOST_PORT),
        Collections.singleton(EXAMPLE_TOPIC));

    JavaDStream<ExampleXML> records = stream.map(t -> t._2()).map(new ParseXML());
    records.foreachRDD(rdd -> System.out.printf("Amount of XMLs: %d\n", rdd.count()));

    jsc.start();
    jsc.awaitTermination();
  }
}
 
开发者ID:ciandt-dev,项目名称:gcp,代码行数:18,代码来源:Spark4KafkaNew.java

示例3: run

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
private void run(CompositeConfiguration conf) {
    // Spark conf
    SparkConf sparkConf = new SparkConf().setAppName("TwitterSparkCrawler").setMaster(conf.getString("spark.master"))
            .set("spark.serializer", conf.getString("spark.serializer"));
    JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(conf.getLong("stream.duration")));

    // Twitter4J
    // IMPORTANT: put keys in twitter4J.properties
    Configuration twitterConf = ConfigurationContext.getInstance();
    Authorization twitterAuth = AuthorizationFactory.getInstance(twitterConf);

    // Create twitter stream
    String[] filters = { "#Car" };
    TwitterUtils.createStream(jssc, twitterAuth, filters).print();
    // Start the computation
    jssc.start();
    jssc.awaitTermination();
}
 
开发者ID:ogidogi,项目名称:laughing-octo-sansa,代码行数:19,代码来源:TwitterSparkCrawler.java

示例4: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
public static void main(String[] args) throws InterruptedException {
  SparkConf sc = new SparkConf().setAppName("POC-Streaming");
  try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(2000))) {
    //JavaDStream<SampleXML> records = jsc.textFileStream("input/").map(new ParseXML());
    //textFileStream process lines of files, so xml has to be 1 line to work //alternative below

    JavaRDD<String> files = jsc.sparkContext().wholeTextFiles("input/").map(tuple -> tuple._2());
    Queue<JavaRDD<String>> rddQueue = new LinkedList<>();
    rddQueue.add(files);
    JavaDStream<String> records = jsc.queueStream(rddQueue);

    records.foreachRDD(rdd -> System.out.printf("Amount of XMLs: %d\n", rdd.count()));

    jsc.start();
    jsc.awaitTermination();
  }
}
 
开发者ID:ciandt-dev,项目名称:gcp,代码行数:18,代码来源:Spark2Streaming.java

示例5: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
public static void main(String[] args) throws InterruptedException, IOException {
  SparkConf sc = new SparkConf().setAppName("POC-BigQuery");
  
  try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(60000))) {
    JavaPairInputDStream<String, String> stream = KafkaUtils.createDirectStream(
        jsc, String.class, String.class, StringDecoder.class, StringDecoder.class,
        Collections.singletonMap("metadata.broker.list", KAFKA_HOST_PORT), Collections.singleton(EXAMPLE_TOPIC));

    Configuration conf = new Configuration();
    BigQueryConfiguration.configureBigQueryOutput(conf, BQ_EXAMPLE_TABLE, BQ_EXAMPLE_SCHEMA);
    conf.set("mapreduce.job.outputformat.class", BigQueryOutputFormat.class.getName());

    JavaDStream<ExampleXML> records = stream.map(t -> t._2()).map(new ParseXML());
    records.foreachRDD(rdd -> {
      System.out.printf("Amount of XMLs: %d\n", rdd.count());
      long time = System.currentTimeMillis();
      rdd.mapToPair(new PrepToBQ()).saveAsNewAPIHadoopDataset(conf);
      System.out.printf("Sent to BQ in %fs\n", (System.currentTimeMillis()-time)/1000f);
    });
    
    jsc.start();
    jsc.awaitTermination();
  }
}
 
开发者ID:ciandt-dev,项目名称:gcp,代码行数:25,代码来源:Spark6BigQuery.java

示例6: providesKafkaInputStream

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
@Provides
JavaInputDStream<ConsumerRecord<String, RawRating>> providesKafkaInputStream(JavaStreamingContext streamingContext) {
    Map<String, Object> kafkaParams = new HashedMap();
    kafkaParams.put("bootstrap.servers", "localhost:9092");
    kafkaParams.put("key.deserializer", StringDeserializer.class);
    kafkaParams.put("value.deserializer", JsonDeserializer.class);
    kafkaParams.put("serializedClass", RawRating.class);
    kafkaParams.put("group.id", "rating_stream");
    kafkaParams.put("auto.offset.reset", "latest");
    kafkaParams.put("enable.auto.commit", false);
    Collection<String> topics = Arrays.asList("topicA", "topicB");

    return KafkaUtils.createDirectStream(
            streamingContext,
            LocationStrategies.PreferConsistent(),
            ConsumerStrategies.<String, RawRating>Subscribe(topics, kafkaParams)
    );
}
 
开发者ID:cosminseceleanu,项目名称:movie-recommender,代码行数:19,代码来源:SparkModule.java

示例7: checkpoint

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
private void checkpoint(JavaStreamingContext jssc, CheckpointDir checkpointDir) {
  Path rootCheckpointPath = checkpointDir.getRootCheckpointDir();
  Path sparkCheckpointPath = checkpointDir.getSparkCheckpointDir();
  Path beamCheckpointPath = checkpointDir.getBeamCheckpointDir();

  try {
    FileSystem fileSystem =
        rootCheckpointPath.getFileSystem(jssc.sparkContext().hadoopConfiguration());
    if (!fileSystem.exists(rootCheckpointPath)) {
      fileSystem.mkdirs(rootCheckpointPath);
    }
    if (!fileSystem.exists(sparkCheckpointPath)) {
      fileSystem.mkdirs(sparkCheckpointPath);
    }
    if (!fileSystem.exists(beamCheckpointPath)) {
      fileSystem.mkdirs(beamCheckpointPath);
    }
  } catch (IOException e) {
    throw new RuntimeException("Failed to create checkpoint dir", e);
  }

  jssc.checkpoint(sparkCheckpointPath.toString());
}
 
开发者ID:apache,项目名称:beam,代码行数:24,代码来源:SparkRunnerStreamingContextFactory.java

示例8: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
	System.setProperty("hadoop.home.dir", "E:\\hadoop");

	final String ip = "10.0.75.1";
	final int port = Integer.parseInt("9000");
	final String checkpointDirectory = "E:\\hadoop\\checkpoint";
	// Function to create JavaStreamingContext without any output operations
	// (used to detect the new context)
	Function0<JavaStreamingContext> createContextFunc = new Function0<JavaStreamingContext>() {
		@Override
		public JavaStreamingContext call() {
			return createContext(ip, port, checkpointDirectory);
		}
	};

	JavaStreamingContext ssc = JavaStreamingContext.getOrCreate(checkpointDirectory, createContextFunc);
	ssc.start();
	ssc.awaitTermination();
}
 
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:20,代码来源:WordCountRecoverableEx.java

示例9: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
	Flags.setFromCommandLineArgs(THE_OPTIONS, args);

	// 初始化Spark Conf.
	SparkConf conf = new SparkConf().setAppName("A SECTONG Application: Apache Log Analysis with Spark");
	JavaSparkContext sc = new JavaSparkContext(conf);
	JavaStreamingContext jssc = new JavaStreamingContext(sc, Flags.getInstance().getSlideInterval());
	SQLContext sqlContext = new SQLContext(sc);

	// 初始化参数
	HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(Flags.getInstance().getKafka_topic().split(",")));
	HashMap<String, String> kafkaParams = new HashMap<String, String>();
	kafkaParams.put("metadata.broker.list", Flags.getInstance().getKafka_broker());

	// 从Kafka Stream获取数据
	JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(jssc, String.class, String.class,
			StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet);

	JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
		private static final long serialVersionUID = 5266880065425088203L;

		public String call(Tuple2<String, String> tuple2) {
			return tuple2._2();
		}
	});

	JavaDStream<ApacheAccessLog> accessLogsDStream = lines.flatMap(line -> {
		List<ApacheAccessLog> list = new ArrayList<>();
		try {
			// 映射每一行
			list.add(ApacheAccessLog.parseFromLogLine(line));
			return list;
		} catch (RuntimeException e) {
			return list;
		}
	}).cache();

	accessLogsDStream.foreachRDD(rdd -> {

		// rdd to DataFrame
		DataFrame df = sqlContext.createDataFrame(rdd, ApacheAccessLog.class);
		// 写入Parquet文件
		df.write().partitionBy("ipAddress", "method", "responseCode").mode(SaveMode.Append).parquet(Flags.getInstance().getParquetFile());

		return null;
	});

	// 启动Streaming服务器
	jssc.start(); // 启动计算
	jssc.awaitTermination(); // 等待终止
}
 
开发者ID:sectong,项目名称:SparkToParquet,代码行数:52,代码来源:AppMain.java

示例10: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
public static void main(String[] args) 
{
 SparkConf conf = new SparkConf();
 conf.setAppName("Wordcount Background");
 conf.setMaster("local");
  
 
 JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(15));
 
 
 JavaDStream<String> lines = ssc.textFileStream("/home/rahul/DATASET");
 JavaDStream<String> words = lines.flatMap(WORDS_EXTRACTOR);
 JavaPairDStream<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
 JavaPairDStream<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
 
 counter.print();
 
 ssc.start();
 
 ssc.awaitTermination();
 

 /*JavaRDD<String> file = context.textFile("/home/rahul/Desktop/palestine.txt");
 JavaRDD<String> words = file.flatMap(WORDS_EXTRACTOR);
 JavaPairRDD<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
 JavaPairRDD<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
 counter.saveAsTextFile("/home/rahul/Desktop/wc"); 
 context.close();*/
}
 
开发者ID:arks-api,项目名称:arks-api,代码行数:30,代码来源:WordCount.java

示例11: run

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
public void run() throws IOException {
  SparkConf conf = new SparkConf();
  conf.setAppName(getAppName());
  conf.set(SPARK_SERIALIZER, ORG_APACHE_SPARK_SERIALIZER_KRYO_SERIALIZER);
  JavaSparkUtil.packProjectJars(conf);
  setupSparkConf(conf);

  JavaStreamingContext ssc = new JavaStreamingContext(conf, getDuration());
  List<JavaDStream<T>> streamsList = getStreamsList(ssc);

  // Union all the streams if there is more than 1 stream
  JavaDStream<T> streams = unionStreams(ssc, streamsList);

  JavaPairDStream<String, RowMutation> pairDStream = streams.mapToPair(new PairFunction<T, String, RowMutation>() {
    public Tuple2<String, RowMutation> call(T t) {
      RowMutation rowMutation = convert(t);
      return new Tuple2<String, RowMutation>(rowMutation.getRowId(), rowMutation);
    }
  });

  pairDStream.foreachRDD(getFunction());

  ssc.start();
  ssc.awaitTermination();
}
 
开发者ID:apache,项目名称:incubator-blur,代码行数:26,代码来源:BlurLoadSparkProcessor.java

示例12: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
public static void main(String[] args) throws InterruptedException {

        String messagingServiceHost = System.getenv("MESSAGING_SERVICE_HOST");
        if (messagingServiceHost != null) {
            host = messagingServiceHost;
        }
        LOG.info("host = {}", host);
        String messagingServicePort = System.getenv("MESSAGING_SERVICE_PORT");
        if (messagingServicePort != null) {
            port = Integer.valueOf(messagingServicePort);
        }
        LOG.info("port = {}", port);

        JavaStreamingContext ssc = JavaStreamingContext.getOrCreate(CHECKPOINT_DIR, AMQPTemperature::createStreamingContext);

        ssc.start();
        ssc.awaitTermination();
    }
 
开发者ID:ppatierno,项目名称:enmasse-iot-demo,代码行数:19,代码来源:AMQPTemperature.java

示例13: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
public static void main(String[] args) throws InterruptedException, IOException, JAXBException {
  SparkConf sc = new SparkConf().setAppName("Receiving-KafkaToBQ");

  try (JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(60000))) {

    JavaPairDStream<String, String> stream = new KafkaInputWithOffsets(
        KAFKA_HOST_PORT, EXAMPLE_TOPIC, ZOOKEEPER_HOST, ZK_PATH).createResumableStream(jsc);

    stream.foreachRDD(IdleStop.create(jsc, 2, "XMLs count: %d\n"));

    stream
        .mapToPair(parseXml())
        .filter(t -> t != null)
        .mapToPair(prepToBq())
        .foreachRDD(BigQueryHelper.outputTo(BQ_EXAMPLE_TABLE, BQ_EXAMPLE_SCHEMA));

    jsc.start();
    jsc.awaitTermination();
  }
}
 
开发者ID:ciandt-dev,项目名称:gcp,代码行数:21,代码来源:Spark8Organized.java

示例14: BatchUpdateFunction

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
BatchUpdateFunction(Config config,
                    Class<K> keyClass,
                    Class<M> messageClass,
                    Class<? extends Writable> keyWritableClass,
                    Class<? extends Writable> messageWritableClass,
                    String dataDirString,
                    String modelDirString,
                    BatchLayerUpdate<K,M,U> updateInstance,
                    JavaStreamingContext streamingContext) {
  this.keyClass = keyClass;
  this.messageClass = messageClass;
  this.keyWritableClass = keyWritableClass;
  this.messageWritableClass = messageWritableClass;
  this.dataDirString = dataDirString;
  this.modelDirString = modelDirString;
  this.updateBroker = ConfigUtils.getOptionalString(config, "oryx.update-topic.broker");
  this.updateTopic = ConfigUtils.getOptionalString(config, "oryx.update-topic.message.topic");
  this.updateInstance = updateInstance;
  this.sparkContext = streamingContext.sparkContext();
}
 
开发者ID:oncewang,项目名称:oryx2,代码行数:21,代码来源:BatchUpdateFunction.java

示例15: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入依赖的package包/类
public static void main(String[] args) throws InterruptedException {

        // getting AMQP messaging service connection information
        String messagingServiceHost = System.getenv("MESSAGING_SERVICE_HOST");
        if (messagingServiceHost != null) {
            host = messagingServiceHost;
        }
        String messagingServicePort = System.getenv("MESSAGING_SERVICE_PORT");
        if (messagingServicePort != null) {
            port = Integer.valueOf(messagingServicePort);
        }
        log.info("AMQP messaging service hostname {}:{}", host, port);

        // getting credentials for authentication
        username = System.getenv("SPARK_DRIVER_USERNAME");
        password = System.getenv("SPARK_DRIVER_PASSWORD");
        log.info("Credentials {}/{}", username, password);

        JavaStreamingContext ssc = JavaStreamingContext.getOrCreate(CHECKPOINT_DIR, TemperatureAnalyzer::createStreamingContext);

        ssc.start();
        ssc.awaitTermination();
    }
 
开发者ID:EnMasseProject,项目名称:enmasse-workshop,代码行数:24,代码来源:TemperatureAnalyzer.java


注:本文中的org.apache.spark.streaming.api.java.JavaStreamingContext类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。