本文整理匯總了Java中org.apache.spark.streaming.api.java.JavaDStream.filter方法的典型用法代碼示例。如果您正苦於以下問題:Java JavaDStream.filter方法的具體用法?Java JavaDStream.filter怎麽用?Java JavaDStream.filter使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.spark.streaming.api.java.JavaDStream
的用法示例。
在下文中一共展示了JavaDStream.filter方法的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: main
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) throws Exception {
String master = args[0];
JavaSparkContext sc = new JavaSparkContext(master, "StreamingLogInput");
// Create a StreamingContext with a 1 second batch size
JavaStreamingContext jssc = new JavaStreamingContext(sc, new Duration(1000));
// Create a DStream from all the input on port 7777
JavaDStream<String> lines = jssc.socketTextStream("localhost", 7777);
// Filter our DStream for lines with "error"
JavaDStream<String> errorLines = lines.filter(new Function<String, Boolean>() {
public Boolean call(String line) {
return line.contains("error");
}});
// Print out the lines with errors, which causes this DStream to be evaluated
errorLines.print();
// start our streaming context and wait for it to "finish"
jssc.start();
// Wait for 10 seconds then exit. To run forever call without a timeout
jssc.awaitTermination(10000);
// Stop the streaming context
jssc.stop();
}
示例2: processPOIData
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
/**
* Method to get the vehicles which are in radius of POI and their distance from POI.
*
* @param nonFilteredIotDataStream original IoT data stream
* @param broadcastPOIValues variable containing POI coordinates, route and vehicle types to monitor.
*/
public void processPOIData(JavaDStream<IoTData> nonFilteredIotDataStream,Broadcast<Tuple3<POIData, String, String>> broadcastPOIValues) {
// Filter by routeId,vehicleType and in POI range
JavaDStream<IoTData> iotDataStreamFiltered = nonFilteredIotDataStream
.filter(iot -> (iot.getRouteId().equals(broadcastPOIValues.value()._2())
&& iot.getVehicleType().contains(broadcastPOIValues.value()._3())
&& GeoDistanceCalculator.isInPOIRadius(Double.valueOf(iot.getLatitude()),
Double.valueOf(iot.getLongitude()), broadcastPOIValues.value()._1().getLatitude(),
broadcastPOIValues.value()._1().getLongitude(),
broadcastPOIValues.value()._1().getRadius())));
// pair with poi
JavaPairDStream<IoTData, POIData> poiDStreamPair = iotDataStreamFiltered
.mapToPair(iot -> new Tuple2<>(iot, broadcastPOIValues.value()._1()));
// Transform to dstream of POITrafficData
JavaDStream<POITrafficData> trafficDStream = poiDStreamPair.map(poiTrafficDataFunc);
// Map Cassandra table column
Map<String, String> columnNameMappings = new HashMap<String, String>();
columnNameMappings.put("vehicleId", "vehicleid");
columnNameMappings.put("distance", "distance");
columnNameMappings.put("vehicleType", "vehicletype");
columnNameMappings.put("timeStamp", "timestamp");
// call CassandraStreamingJavaUtil function to save in DB
javaFunctions(trafficDStream)
.writerBuilder("traffickeyspace", "poi_traffic",CassandraJavaUtil.mapToRow(POITrafficData.class, columnNameMappings))
.withConstantTTL(120)//keeping data for 2 minutes
.saveToCassandra();
}
示例3: readData
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
/**
* Method to read in the data from an allowed input format, filter, and return a RDD of MapWritable data elements
*/
@SuppressWarnings("unchecked")
public JavaDStream<MapWritable> readData() throws IOException, PIRException
{
logger.info("Reading data ");
Job job = Job.getInstance();
String baseQuery = SystemConfiguration.getProperty("pir.baseQuery");
String jobName = "pirSpark_base_" + baseQuery + "_" + System.currentTimeMillis();
job.setJobName(jobName);
job.getConfiguration().setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true);
job.getConfiguration().set("query", baseQuery);
job.getConfiguration().set("dataSchemaName", qSchema.getDataSchemaName());
job.getConfiguration().set("data.schemas", SystemConfiguration.getProperty("data.schemas"));
// Set the inputFormatClass based upon the baseInputFormat property
String classString = SystemConfiguration.getProperty("pir.baseInputFormat");
Class<? extends BaseInputFormat<Text,MapWritable>> inputClass;
try
{
inputClass = (Class<? extends BaseInputFormat<Text,MapWritable>>) Class.forName(classString);
} catch (ClassNotFoundException | ClassCastException e)
{
throw new PIRException(classString + " cannot be instantiated or does not extend BaseInputFormat", e);
}
job.setInputFormatClass(inputClass);
FileInputFormat.setInputPaths(job, inputData);
// Read data from hdfs
logger.info("useQueueStream = " + useQueueStream);
JavaDStream<MapWritable> mwStream;
if (useQueueStream)
{
Queue<JavaRDD<MapWritable>> rddQueue = new LinkedList<>();
JavaRDD<MapWritable> rddIn = jssc.sparkContext().newAPIHadoopRDD(job.getConfiguration(), inputClass, Text.class, MapWritable.class).values()
.coalesce(numDataPartitions);
rddQueue.add(rddIn);
mwStream = jssc.queueStream(rddQueue);
}
else
{
JavaPairInputDStream<Text,MapWritable> inputRDD = jssc.fileStream(inputData, Text.class, MapWritable.class, inputClass);
mwStream = inputRDD.transform(new Function<JavaPairRDD<Text,MapWritable>,JavaRDD<MapWritable>>()
{
private static final long serialVersionUID = 1L;
@Override
public JavaRDD<MapWritable> call(JavaPairRDD<Text,MapWritable> pair) throws Exception
{
return pair.values();
}
}).repartition(numDataPartitions);
}
// Filter out by the provided stopListFile entries
if (qSchema.getFilter() != null)
{
return mwStream.filter(new FilterData(accum, bVars));
}
return mwStream;
}
示例4: readDataES
import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
/**
* Method to read in the data from elasticsearch, filter, and return a RDD of MapWritable data elements
*/
@SuppressWarnings("unchecked")
public JavaDStream<MapWritable> readDataES() throws IOException
{
logger.info("Reading data ");
Job job = Job.getInstance();
String jobName = "pirSpark_ES_" + esQuery + "_" + System.currentTimeMillis();
job.setJobName(jobName);
job.getConfiguration().set("es.nodes", SystemConfiguration.getProperty("es.nodes"));
job.getConfiguration().set("es.port", SystemConfiguration.getProperty("es.port"));
job.getConfiguration().set("es.resource", esResource);
job.getConfiguration().set("es.query", esQuery);
// Read data from hdfs
JavaDStream<MapWritable> mwStream;
if (useQueueStream)
{
Queue<JavaRDD<MapWritable>> rddQueue = new LinkedList<>();
JavaRDD<MapWritable> rddIn = jssc.sparkContext().newAPIHadoopRDD(job.getConfiguration(), EsInputFormat.class, Text.class, MapWritable.class).values()
.coalesce(numDataPartitions);
rddQueue.add(rddIn);
mwStream = jssc.queueStream(rddQueue);
}
else
{
JavaPairInputDStream<Text,MapWritable> inputRDD = jssc.fileStream(inputData, Text.class, MapWritable.class, EsInputFormat.class);
mwStream = inputRDD.transform(new Function<JavaPairRDD<Text,MapWritable>,JavaRDD<MapWritable>>()
{
private static final long serialVersionUID = 1L;
@Override
public JavaRDD<MapWritable> call(JavaPairRDD<Text,MapWritable> pair) throws Exception
{
return pair.values();
}
}).repartition(numDataPartitions);
}
// Filter out by the provided stopListFile entries
if (qSchema.getFilter() != null)
{
return mwStream.filter(new FilterData(accum, bVars));
}
else
{
return mwStream;
}
}