本文整理汇总了Java中org.apache.spark.streaming.api.java.JavaStreamingContext.textFileStream方法的典型用法代码示例。如果您正苦于以下问题:Java JavaStreamingContext.textFileStream方法的具体用法?Java JavaStreamingContext.textFileStream怎么用?Java JavaStreamingContext.textFileStream使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.streaming.api.java.JavaStreamingContext
的用法示例。
在下文中一共展示了JavaStreamingContext.textFileStream方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static void main(String[] args)
{
SparkConf conf = new SparkConf();
conf.setAppName("Wordcount Background");
conf.setMaster("local");
JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(15));
JavaDStream<String> lines = ssc.textFileStream("/home/rahul/DATASET");
JavaDStream<String> words = lines.flatMap(WORDS_EXTRACTOR);
JavaPairDStream<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
JavaPairDStream<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
counter.print();
ssc.start();
ssc.awaitTermination();
/*JavaRDD<String> file = context.textFile("/home/rahul/Desktop/palestine.txt");
JavaRDD<String> words = file.flatMap(WORDS_EXTRACTOR);
JavaPairRDD<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
JavaPairRDD<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
counter.saveAsTextFile("/home/rahul/Desktop/wc");
context.close();*/
}
示例2: start
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
private void start() {
// Create a local StreamingContext with two working thread and batch interval of
// 1 second
SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCount");
JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));
JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils.getInputDirectory());
msgDataStream.print();
jssc.start();
try {
jssc.awaitTermination();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
示例3: start
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
private void start() {
// Create a local StreamingContext with two working thread and batch interval of
// 1 second
SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("Streaming Ingestion File System Text File to Dataframe");
JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));
JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils.getInputDirectory());
msgDataStream.print();
// Create JavaRDD<Row>
msgDataStream.foreachRDD(new RowProcessor());
jssc.start();
try {
jssc.awaitTermination();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
开发者ID:jgperrin,项目名称:net.jgp.labs.spark,代码行数:21,代码来源:StreamingIngestionFileSystemTextFileToDataframeMultipleClassesApp.java
示例4: main
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static void main(String[] args) {
//Window Specific property if Hadoop is not instaalled or HADOOP_HOME is not set
System.setProperty("hadoop.home.dir", "E:\\hadoop");
//Logger rootLogger = LogManager.getRootLogger();
//rootLogger.setLevel(Level.WARN);
SparkConf conf = new SparkConf().setAppName("KafkaExample").setMaster("local[*]");
String inputDirectory="E:\\hadoop\\streamFolder\\";
JavaSparkContext sc = new JavaSparkContext(conf);
JavaStreamingContext streamingContext = new JavaStreamingContext(sc, Durations.seconds(1));
// streamingContext.checkpoint("E:\\hadoop\\checkpoint");
Logger rootLogger = LogManager.getRootLogger();
rootLogger.setLevel(Level.WARN);
JavaDStream<String> streamfile = streamingContext.textFileStream(inputDirectory);
streamfile.print();
streamfile.foreachRDD(rdd-> rdd.foreach(x -> System.out.println(x)));
JavaPairDStream<LongWritable, Text> streamedFile = streamingContext.fileStream(inputDirectory, LongWritable.class, Text.class, TextInputFormat.class);
streamedFile.print();
streamingContext.start();
try {
streamingContext.awaitTermination();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
示例5: getJavaPairDStream
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static JavaPairDStream<String, String> getJavaPairDStream(final File tempDir, final JavaStreamingContext ssc, final String subject1) {
final JavaDStream<String> lines = ssc.textFileStream(tempDir.getAbsolutePath());
JavaPairDStream<String, String> keyValues = lines.mapToPair((PairFunction<String, String, String>) str -> {
return new Tuple2<String, String>(subject1 + "." + str, str);
});
return keyValues;
}
示例6: main
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static void main(String[] args) {
// Create the context with a 1 second batch size
SparkConf sparkConf = new SparkConf().setMaster("local[*]").setAppName("Streaming102");
//SparkConf sparkConf = new SparkConf().setMaster("spark://10.204.100.206:7077").setAppName("Streaming102");
sparkConf.setJars(new String[] { "target\\original-TestProjects-1.0-SNAPSHOT.jar" });
JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(10));
String folder = "./stream/";
if(args.length == 1){
folder = args[0];
}
JavaDStream<String> lines = ssc.textFileStream(folder);
JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
@Override
public Iterable<String> call(String x) {
System.out.println(x);
return Lists.newArrayList(SPACE.split(x));
}
});
JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
new PairFunction<String, String, Integer>() {
@Override
public Tuple2<String, Integer> call(String s) {
return new Tuple2<String, Integer>(s, 1);
}
}).reduceByKey(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer i1, Integer i2) {
return i1 + i2;
}
});
wordCounts.print();
ssc.start();
ssc.awaitTermination();
}
示例7: main
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
Logger.getLogger("org").setLevel(Level.WARN);
Logger.getLogger("akka").setLevel(Level.WARN);
final Pattern SPACE = Pattern.compile(" ");
SparkConf conf = new SparkConf().setAppName("Big Apple").setMaster("local[2]");
JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(1));
JavaDStream<String> lines = ssc.textFileStream("src/main/resources/stream");
lines.print();
JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
@Override
public Iterator<String> call(String x) {
return Lists.newArrayList(SPACE.split(x)).iterator();
}
});
words.foreachRDD(
new VoidFunction2<JavaRDD<String>, Time>() {
@Override
public void call(JavaRDD<String> rdd, Time time) {
// Get the singleton instance of SQLContext
SQLContext sqlContext = SQLContext.getOrCreate(rdd.context());
// Convert RDD[String] to RDD[case class] to Dataset
JavaRDD<JavaRecord> rowRDD = rdd.map(new Function<String, JavaRecord>() {
public JavaRecord call(String word) {
JavaRecord record = new JavaRecord();
record.setWord(word);
return record;
}
});
Dataset<Row> wordsDataset = sqlContext.createDataFrame(rowRDD, JavaRecord.class);
// Register as table
wordsDataset.registerTempTable("words");
// Do word count on table using SQL and print it
Dataset wordCountsDataset =
sqlContext.sql("select word, count(*) as total from words group by word");
wordCountsDataset.show();
}
}
);
ssc.start();
ssc.awaitTermination();
}
示例8: main
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
final Pattern SPACE = Pattern.compile(" ");
SparkConf conf = new SparkConf().setAppName("Big Apple").setMaster("local[2]");
JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(1));
JavaDStream<String> lines = ssc.textFileStream("src/main/resources/stream");
lines.print();
JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
@Override
public Iterator<String> call(String x) {
return Lists.newArrayList(SPACE.split(x)).iterator();
}
});
JavaPairDStream<String, Integer> wordsDstream = words.mapToPair(
new PairFunction<String, String, Integer>() {
@Override
public Tuple2<String, Integer> call(String s) {
return new Tuple2<String, Integer>(s, 1);
}
});
wordsDstream.print();
Function2<Integer, Integer, Integer> reduceFunc = new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer i1, Integer i2) {
return i1 + i2;
}
};
JavaPairDStream<String, Integer> windowedWordCounts = wordsDstream.reduceByKeyAndWindow(reduceFunc, Durations.seconds(30), Durations.seconds(10));
windowedWordCounts.print();
ssc.start();
ssc.awaitTermination();
}
示例9: start
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
private void start() {
// Create a local StreamingContext with two working thread and batch interval of
// 1 second
SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("Streaming Ingestion File System Text File to Dataframe");
JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));
JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils.getInputDirectory());
msgDataStream.print();
// Create JavaRDD<Row>
msgDataStream.foreachRDD(new VoidFunction<JavaRDD<String>>() {
private static final long serialVersionUID = -590010339928376829L;
@Override
public void call(JavaRDD<String> rdd) {
JavaRDD<Row> rowRDD = rdd.map(new Function<String, Row>() {
private static final long serialVersionUID = 5167089361335095997L;
@Override
public Row call(String msg) {
Row row = RowFactory.create(msg);
return row;
}
});
// Create Schema
StructType schema = DataTypes.createStructType(
new StructField[] { DataTypes.createStructField("Message", DataTypes.StringType, true) });
// Get Spark 2.0 session
SparkSession spark = JavaSparkSessionSingleton.getInstance(rdd.context().getConf());
Dataset<Row> msgDataFrame = spark.createDataFrame(rowRDD, schema);
msgDataFrame.show();
}
});
jssc.start();
try {
jssc.awaitTermination();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
开发者ID:jgperrin,项目名称:net.jgp.labs.spark,代码行数:44,代码来源:StreamingIngestionFileSystemTextFileToDataframeApp.java
示例10: main
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static void main(String[] args) throws IOException {
Flags.setFromCommandLineArgs(THE_OPTIONS, args);
// Startup the Spark Conf.
SparkConf conf = new SparkConf()
.setAppName("A Databricks Reference Application: Logs Analysis with Spark");
JavaStreamingContext jssc = new JavaStreamingContext(conf,
Flags.getInstance().getSlideInterval());
// Checkpointing must be enabled to use the updateStateByKey function & windowed operations.
jssc.checkpoint(Flags.getInstance().getCheckpointDirectory());
// This methods monitors a directory for new files to read in for streaming.
JavaDStream<String> logData = jssc.textFileStream(Flags.getInstance().getLogsDirectory());
JavaDStream<ApacheAccessLog> accessLogsDStream
= logData.map(new Functions.ParseFromLogLine()).cache();
final LogAnalyzerTotal logAnalyzerTotal = new LogAnalyzerTotal();
final LogAnalyzerWindowed logAnalyzerWindowed = new LogAnalyzerWindowed();
// Process the DStream which gathers stats for all of time.
logAnalyzerTotal.processAccessLogs(Flags.getInstance().getOutputDirectory(), accessLogsDStream);
// Calculate statistics for the last time interval.
logAnalyzerWindowed.processAccessLogs(Flags.getInstance().getOutputDirectory(), accessLogsDStream);
// Render the output each time there is a new RDD in the accessLogsDStream.
final Renderer renderer = new Renderer();
accessLogsDStream.foreachRDD(new Function<JavaRDD<ApacheAccessLog>, Void>() {
public Void call(JavaRDD<ApacheAccessLog> rdd) {
// Call this to output the stats.
try {
renderer.render(logAnalyzerTotal.getLogStatistics(),
logAnalyzerWindowed.getLogStatistics());
} catch (Exception e) {
}
return null;
}
});
// Start the streaming server.
jssc.start(); // Start the computation
jssc.awaitTermination(); // Wait for the computation to terminate
}
示例11: main
import org.apache.spark.streaming.api.java.JavaStreamingContext; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
SparkConf conf = new SparkConf().setAppName("Big Apple").setMaster("local[2]");
JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(1));
JavaDStream<String> stringJavaDStream = ssc.textFileStream("src/main/resources/stream");
stringJavaDStream.print();
ssc.start();
ssc.awaitTermination();
}