当前位置: 首页>>代码示例>>Java>>正文


Java JavaStreamingContext.textFileStream方法代码示例

本文整理汇总了Java中org.apache.spark.streaming.api.java.JavaStreamingContext.textFileStream方法的典型用法代码示例。如果您正苦于以下问题:Java JavaStreamingContext.textFileStream方法的具体用法?Java JavaStreamingContext.textFileStream怎么用?Java JavaStreamingContext.textFileStream使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.spark.streaming.api.java.JavaStreamingContext的用法示例。


在下文中一共展示了JavaStreamingContext.textFileStream方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static void main(String[] args) 
{
 SparkConf conf = new SparkConf();
 conf.setAppName("Wordcount Background");
 conf.setMaster("local");
  
 
 JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(15));
 
 
 JavaDStream<String> lines = ssc.textFileStream("/home/rahul/DATASET");
 JavaDStream<String> words = lines.flatMap(WORDS_EXTRACTOR);
 JavaPairDStream<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
 JavaPairDStream<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
 
 counter.print();
 
 ssc.start();
 
 ssc.awaitTermination();
 

 /*JavaRDD<String> file = context.textFile("/home/rahul/Desktop/palestine.txt");
 JavaRDD<String> words = file.flatMap(WORDS_EXTRACTOR);
 JavaPairRDD<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
 JavaPairRDD<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
 counter.saveAsTextFile("/home/rahul/Desktop/wc"); 
 context.close();*/
}
 
开发者ID:arks-api,项目名称:arks-api,代码行数:30,代码来源:WordCount.java

示例2: start

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
private void start() {
	// Create a local StreamingContext with two working thread and batch interval of
	// 1 second
	SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCount");
	JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));

	JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils.getInputDirectory());
	msgDataStream.print();

	jssc.start();
	try {
		jssc.awaitTermination();
	} catch (InterruptedException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
}
 
开发者ID:jgperrin,项目名称:net.jgp.labs.spark,代码行数:18,代码来源:StreamingIngestionFileSystemTextFileApp.java

示例3: start

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
private void start() {
	// Create a local StreamingContext with two working thread and batch interval of
	// 1 second
	SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("Streaming Ingestion File System Text File to Dataframe");
	JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));

	JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils.getInputDirectory());

	msgDataStream.print();
	// Create JavaRDD<Row>
	msgDataStream.foreachRDD(new RowProcessor());	

	jssc.start();
	try {
		jssc.awaitTermination();
	} catch (InterruptedException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
}
 
开发者ID:jgperrin,项目名称:net.jgp.labs.spark,代码行数:21,代码来源:StreamingIngestionFileSystemTextFileToDataframeMultipleClassesApp.java

示例4: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static void main(String[] args) {
   	//Window Specific property if Hadoop is not instaalled or HADOOP_HOME is not set
	 System.setProperty("hadoop.home.dir", "E:\\hadoop");
   	//Logger rootLogger = LogManager.getRootLogger();
  		//rootLogger.setLevel(Level.WARN); 
       SparkConf conf = new SparkConf().setAppName("KafkaExample").setMaster("local[*]");
       String inputDirectory="E:\\hadoop\\streamFolder\\";
    
       JavaSparkContext sc = new JavaSparkContext(conf);
       JavaStreamingContext streamingContext = new JavaStreamingContext(sc, Durations.seconds(1));
      // streamingContext.checkpoint("E:\\hadoop\\checkpoint");
       Logger rootLogger = LogManager.getRootLogger();
  		rootLogger.setLevel(Level.WARN); 
  		
  		JavaDStream<String> streamfile = streamingContext.textFileStream(inputDirectory);
  		streamfile.print();
  		streamfile.foreachRDD(rdd-> rdd.foreach(x -> System.out.println(x)));
  		
  			   		
  		JavaPairDStream<LongWritable, Text> streamedFile = streamingContext.fileStream(inputDirectory, LongWritable.class, Text.class, TextInputFormat.class);
  	 streamedFile.print();
  		
  	 streamingContext.start();
  	 

       try {
		streamingContext.awaitTermination();
	} catch (InterruptedException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
}
 
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:33,代码来源:FileStreamingEx.java

示例5: getJavaPairDStream

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static JavaPairDStream<String, String> getJavaPairDStream(final File tempDir, final JavaStreamingContext ssc, final String subject1) {
	final JavaDStream<String> lines = ssc.textFileStream(tempDir.getAbsolutePath());
	JavaPairDStream<String, String> keyValues = lines.mapToPair((PairFunction<String, String, String>) str -> {
						return new Tuple2<String, String>(subject1 + "." + str, str);
					});
	return keyValues;
}
 
开发者ID:Logimethods,项目名称:nats-connector-spark,代码行数:8,代码来源:UnitTestUtilities.java

示例6: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static void main(String[] args) {

    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setMaster("local[*]").setAppName("Streaming102");
	//SparkConf sparkConf = new SparkConf().setMaster("spark://10.204.100.206:7077").setAppName("Streaming102");
	sparkConf.setJars(new String[] { "target\\original-TestProjects-1.0-SNAPSHOT.jar" });
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(10));

    String folder = "./stream/";
    if(args.length == 1){
    	folder = args[0];
    }

    JavaDStream<String> lines = ssc.textFileStream(folder);
    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
      @Override
      public Iterable<String> call(String x) {
    	  System.out.println(x);
    	  return Lists.newArrayList(SPACE.split(x));
      }
    });
    
    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
      new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
          return new Tuple2<String, Integer>(s, 1);
        }
      }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
          return i1 + i2;
        }
    });

    wordCounts.print();
    ssc.start();
    ssc.awaitTermination();
  }
 
开发者ID:atulsm,项目名称:Test_Projects,代码行数:40,代码来源:Streaming101.java

示例7: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
    Logger.getLogger("org").setLevel(Level.WARN);
    Logger.getLogger("akka").setLevel(Level.WARN);

    final Pattern SPACE = Pattern.compile(" ");

    SparkConf conf = new SparkConf().setAppName("Big Apple").setMaster("local[2]");
    JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(1));

    JavaDStream<String> lines = ssc.textFileStream("src/main/resources/stream");
    lines.print();

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterator<String> call(String x) {
            return Lists.newArrayList(SPACE.split(x)).iterator();
        }
    });

    words.foreachRDD(
            new VoidFunction2<JavaRDD<String>, Time>() {
                @Override
                public void call(JavaRDD<String> rdd, Time time) {

                    // Get the singleton instance of SQLContext
                    SQLContext sqlContext = SQLContext.getOrCreate(rdd.context());

                    // Convert RDD[String] to RDD[case class] to Dataset
                    JavaRDD<JavaRecord> rowRDD = rdd.map(new Function<String, JavaRecord>() {
                        public JavaRecord call(String word) {
                            JavaRecord record = new JavaRecord();
                            record.setWord(word);
                            return record;
                        }
                    });
                    Dataset<Row> wordsDataset = sqlContext.createDataFrame(rowRDD, JavaRecord.class);

                    // Register as table
                    wordsDataset.registerTempTable("words");

                    // Do word count on table using SQL and print it
                    Dataset wordCountsDataset =
                            sqlContext.sql("select word, count(*) as total from words group by word");
                    wordCountsDataset.show();
                }
            }
    );


    ssc.start();
    ssc.awaitTermination();

}
 
开发者ID:knoldus,项目名称:Sparkathon,代码行数:54,代码来源:SQLonStreams.java

示例8: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

        final Pattern SPACE = Pattern.compile(" ");

        SparkConf conf = new SparkConf().setAppName("Big Apple").setMaster("local[2]");
        JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(1));

        JavaDStream<String> lines = ssc.textFileStream("src/main/resources/stream");
        lines.print();

        JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
            @Override
            public Iterator<String> call(String x) {
                return Lists.newArrayList(SPACE.split(x)).iterator();
            }
        });

        JavaPairDStream<String, Integer> wordsDstream = words.mapToPair(
                new PairFunction<String, String, Integer>() {
                    @Override
                    public Tuple2<String, Integer> call(String s) {
                        return new Tuple2<String, Integer>(s, 1);
                    }
                });

        wordsDstream.print();

        Function2<Integer, Integer, Integer> reduceFunc = new Function2<Integer, Integer, Integer>() {
            @Override
            public Integer call(Integer i1, Integer i2) {
                return i1 + i2;
            }
        };

        JavaPairDStream<String, Integer> windowedWordCounts = wordsDstream.reduceByKeyAndWindow(reduceFunc, Durations.seconds(30), Durations.seconds(10));

        windowedWordCounts.print();


        ssc.start();
        ssc.awaitTermination();

    }
 
开发者ID:knoldus,项目名称:Sparkathon,代码行数:44,代码来源:Windowstream.java

示例9: start

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
private void start() {
	// Create a local StreamingContext with two working thread and batch interval of
	// 1 second
	SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("Streaming Ingestion File System Text File to Dataframe");
	JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));

	JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils.getInputDirectory());

	msgDataStream.print();
	// Create JavaRDD<Row>
	msgDataStream.foreachRDD(new VoidFunction<JavaRDD<String>>() {
		private static final long serialVersionUID = -590010339928376829L;

		@Override
		public void call(JavaRDD<String> rdd) {
			JavaRDD<Row> rowRDD = rdd.map(new Function<String, Row>() {
				private static final long serialVersionUID = 5167089361335095997L;

				@Override
				public Row call(String msg) {
					Row row = RowFactory.create(msg);
					return row;
				}
			});
			// Create Schema
			StructType schema = DataTypes.createStructType(
					new StructField[] { DataTypes.createStructField("Message", DataTypes.StringType, true) });
			
			// Get Spark 2.0 session
			SparkSession spark = JavaSparkSessionSingleton.getInstance(rdd.context().getConf());
			Dataset<Row> msgDataFrame = spark.createDataFrame(rowRDD, schema);
			msgDataFrame.show();
		}
	});

	jssc.start();
	try {
		jssc.awaitTermination();
	} catch (InterruptedException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
}
 
开发者ID:jgperrin,项目名称:net.jgp.labs.spark,代码行数:44,代码来源:StreamingIngestionFileSystemTextFileToDataframeApp.java

示例10: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static void main(String[] args) throws IOException {
  Flags.setFromCommandLineArgs(THE_OPTIONS, args);

  // Startup the Spark Conf.
  SparkConf conf = new SparkConf()
      .setAppName("A Databricks Reference Application: Logs Analysis with Spark");
  JavaStreamingContext jssc = new JavaStreamingContext(conf,
      Flags.getInstance().getSlideInterval());

  // Checkpointing must be enabled to use the updateStateByKey function & windowed operations.
  jssc.checkpoint(Flags.getInstance().getCheckpointDirectory());

  // This methods monitors a directory for new files to read in for streaming.
  JavaDStream<String> logData = jssc.textFileStream(Flags.getInstance().getLogsDirectory());

  JavaDStream<ApacheAccessLog> accessLogsDStream
    = logData.map(new Functions.ParseFromLogLine()).cache();

  final LogAnalyzerTotal logAnalyzerTotal = new LogAnalyzerTotal();
  final LogAnalyzerWindowed logAnalyzerWindowed = new LogAnalyzerWindowed();

  // Process the DStream which gathers stats for all of time.
  logAnalyzerTotal.processAccessLogs(Flags.getInstance().getOutputDirectory(), accessLogsDStream);

  // Calculate statistics for the last time interval.
  logAnalyzerWindowed.processAccessLogs(Flags.getInstance().getOutputDirectory(), accessLogsDStream);

  // Render the output each time there is a new RDD in the accessLogsDStream.
  final Renderer renderer = new Renderer();
  accessLogsDStream.foreachRDD(new Function<JavaRDD<ApacheAccessLog>, Void>() {
      public Void call(JavaRDD<ApacheAccessLog> rdd) {
        // Call this to output the stats.
        try {
          renderer.render(logAnalyzerTotal.getLogStatistics(),
                          logAnalyzerWindowed.getLogStatistics());
        } catch (Exception e) {
        }
        return null;
      }
    });

  // Start the streaming server.
  jssc.start();              // Start the computation
  jssc.awaitTermination();   // Wait for the computation to terminate
}
 
开发者ID:holdenk,项目名称:learning-spark-examples,代码行数:46,代码来源:LogAnalyzerAppMain.java

示例11: main

import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

        SparkConf conf = new SparkConf().setAppName("Big Apple").setMaster("local[2]");
        JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(1));

        JavaDStream<String> stringJavaDStream = ssc.textFileStream("src/main/resources/stream");
        stringJavaDStream.print();

        ssc.start();
        ssc.awaitTermination();

    }
 
开发者ID:knoldus,项目名称:Sparkathon,代码行数:13,代码来源:Basestream.java


注:本文中的org.apache.spark.streaming.api.java.JavaStreamingContext.textFileStream方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。