當前位置: 首頁>>代碼示例>>Java>>正文


Java JavaDStream.filter方法代碼示例

本文整理匯總了Java中org.apache.spark.streaming.api.java.JavaDStream.filter方法的典型用法代碼示例。如果您正苦於以下問題:Java JavaDStream.filter方法的具體用法?Java JavaDStream.filter怎麽用?Java JavaDStream.filter使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在org.apache.spark.streaming.api.java.JavaDStream的用法示例。


在下文中一共展示了JavaDStream.filter方法的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: main

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
public static void main(String[] args) throws Exception {
	String master = args[0];
	JavaSparkContext sc = new JavaSparkContext(master, "StreamingLogInput");
   // Create a StreamingContext with a 1 second batch size
   JavaStreamingContext jssc = new JavaStreamingContext(sc, new Duration(1000));
   // Create a DStream from all the input on port 7777
   JavaDStream<String> lines = jssc.socketTextStream("localhost", 7777);
   // Filter our DStream for lines with "error"
   JavaDStream<String> errorLines = lines.filter(new Function<String, Boolean>() {
       public Boolean call(String line) {
         return line.contains("error");
       }});
   // Print out the lines with errors, which causes this DStream to be evaluated
   errorLines.print();
   // start our streaming context and wait for it to "finish"
   jssc.start();
   // Wait for 10 seconds then exit. To run forever call without a timeout
   jssc.awaitTermination(10000);
   // Stop the streaming context
   jssc.stop();
}
 
開發者ID:holdenk,項目名稱:learning-spark-examples,代碼行數:22,代碼來源:StreamingLogInput.java

示例2: processPOIData

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
/**
 * Method to get the vehicles which are in radius of POI and their distance from POI.
 * 
 * @param nonFilteredIotDataStream original IoT data stream
 * @param broadcastPOIValues variable containing POI coordinates, route and vehicle types to monitor.
 */
public void processPOIData(JavaDStream<IoTData> nonFilteredIotDataStream,Broadcast<Tuple3<POIData, String, String>> broadcastPOIValues) {
	 
	// Filter by routeId,vehicleType and in POI range
	JavaDStream<IoTData> iotDataStreamFiltered = nonFilteredIotDataStream
			.filter(iot -> (iot.getRouteId().equals(broadcastPOIValues.value()._2())
					&& iot.getVehicleType().contains(broadcastPOIValues.value()._3())
					&& GeoDistanceCalculator.isInPOIRadius(Double.valueOf(iot.getLatitude()),
							Double.valueOf(iot.getLongitude()), broadcastPOIValues.value()._1().getLatitude(),
							broadcastPOIValues.value()._1().getLongitude(),
							broadcastPOIValues.value()._1().getRadius())));

	// pair with poi
	JavaPairDStream<IoTData, POIData> poiDStreamPair = iotDataStreamFiltered
			.mapToPair(iot -> new Tuple2<>(iot, broadcastPOIValues.value()._1()));

	// Transform to dstream of POITrafficData
	JavaDStream<POITrafficData> trafficDStream = poiDStreamPair.map(poiTrafficDataFunc);

	// Map Cassandra table column
	Map<String, String> columnNameMappings = new HashMap<String, String>();
	columnNameMappings.put("vehicleId", "vehicleid");
	columnNameMappings.put("distance", "distance");
	columnNameMappings.put("vehicleType", "vehicletype");
	columnNameMappings.put("timeStamp", "timestamp");

	// call CassandraStreamingJavaUtil function to save in DB
	javaFunctions(trafficDStream)
			.writerBuilder("traffickeyspace", "poi_traffic",CassandraJavaUtil.mapToRow(POITrafficData.class, columnNameMappings))
			.withConstantTTL(120)//keeping data for 2 minutes
			.saveToCassandra();
}
 
開發者ID:baghelamit,項目名稱:iot-traffic-monitor,代碼行數:38,代碼來源:IoTTrafficDataProcessor.java

示例3: readData

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
/**
 * Method to read in the data from an allowed input format, filter, and return a RDD of MapWritable data elements
 */
@SuppressWarnings("unchecked")
public JavaDStream<MapWritable> readData() throws IOException, PIRException
{
  logger.info("Reading data ");

  Job job = Job.getInstance();
  String baseQuery = SystemConfiguration.getProperty("pir.baseQuery");
  String jobName = "pirSpark_base_" + baseQuery + "_" + System.currentTimeMillis();
  job.setJobName(jobName);
  job.getConfiguration().setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true);
  job.getConfiguration().set("query", baseQuery);

  job.getConfiguration().set("dataSchemaName", qSchema.getDataSchemaName());
  job.getConfiguration().set("data.schemas", SystemConfiguration.getProperty("data.schemas"));

  // Set the inputFormatClass based upon the baseInputFormat property
  String classString = SystemConfiguration.getProperty("pir.baseInputFormat");
  Class<? extends BaseInputFormat<Text,MapWritable>> inputClass;
  try
  {
    inputClass = (Class<? extends BaseInputFormat<Text,MapWritable>>) Class.forName(classString);
  } catch (ClassNotFoundException | ClassCastException e)
  {
    throw new PIRException(classString + " cannot be instantiated or does not extend BaseInputFormat", e);
  }
  job.setInputFormatClass(inputClass);

  FileInputFormat.setInputPaths(job, inputData);

  // Read data from hdfs
  logger.info("useQueueStream = " + useQueueStream);
  JavaDStream<MapWritable> mwStream;
  if (useQueueStream)
  {
    Queue<JavaRDD<MapWritable>> rddQueue = new LinkedList<>();
    JavaRDD<MapWritable> rddIn = jssc.sparkContext().newAPIHadoopRDD(job.getConfiguration(), inputClass, Text.class, MapWritable.class).values()
        .coalesce(numDataPartitions);

    rddQueue.add(rddIn);
    mwStream = jssc.queueStream(rddQueue);
  }
  else
  {
    JavaPairInputDStream<Text,MapWritable> inputRDD = jssc.fileStream(inputData, Text.class, MapWritable.class, inputClass);
    mwStream = inputRDD.transform(new Function<JavaPairRDD<Text,MapWritable>,JavaRDD<MapWritable>>()
    {
      private static final long serialVersionUID = 1L;

      @Override
      public JavaRDD<MapWritable> call(JavaPairRDD<Text,MapWritable> pair) throws Exception
      {
        return pair.values();
      }
    }).repartition(numDataPartitions);
  }

  // Filter out by the provided stopListFile entries
  if (qSchema.getFilter() != null)
  {
    return mwStream.filter(new FilterData(accum, bVars));
  }

  return mwStream;
}
 
開發者ID:apache,項目名稱:incubator-pirk,代碼行數:68,代碼來源:ComputeStreamingResponse.java

示例4: readDataES

import org.apache.spark.streaming.api.java.JavaDStream; //導入方法依賴的package包/類
/**
 * Method to read in the data from elasticsearch, filter, and return a RDD of MapWritable data elements
 */
@SuppressWarnings("unchecked")
public JavaDStream<MapWritable> readDataES() throws IOException
{
  logger.info("Reading data ");

  Job job = Job.getInstance();
  String jobName = "pirSpark_ES_" + esQuery + "_" + System.currentTimeMillis();
  job.setJobName(jobName);
  job.getConfiguration().set("es.nodes", SystemConfiguration.getProperty("es.nodes"));
  job.getConfiguration().set("es.port", SystemConfiguration.getProperty("es.port"));
  job.getConfiguration().set("es.resource", esResource);
  job.getConfiguration().set("es.query", esQuery);

  // Read data from hdfs
  JavaDStream<MapWritable> mwStream;
  if (useQueueStream)
  {
    Queue<JavaRDD<MapWritable>> rddQueue = new LinkedList<>();
    JavaRDD<MapWritable> rddIn = jssc.sparkContext().newAPIHadoopRDD(job.getConfiguration(), EsInputFormat.class, Text.class, MapWritable.class).values()
        .coalesce(numDataPartitions);
    rddQueue.add(rddIn);

    mwStream = jssc.queueStream(rddQueue);
  }
  else
  {
    JavaPairInputDStream<Text,MapWritable> inputRDD = jssc.fileStream(inputData, Text.class, MapWritable.class, EsInputFormat.class);
    mwStream = inputRDD.transform(new Function<JavaPairRDD<Text,MapWritable>,JavaRDD<MapWritable>>()
    {
      private static final long serialVersionUID = 1L;

      @Override
      public JavaRDD<MapWritable> call(JavaPairRDD<Text,MapWritable> pair) throws Exception
      {
        return pair.values();
      }
    }).repartition(numDataPartitions);
  }

  // Filter out by the provided stopListFile entries
  if (qSchema.getFilter() != null)
  {
    return mwStream.filter(new FilterData(accum, bVars));
  }
  else
  {
    return mwStream;
  }
}
 
開發者ID:apache,項目名稱:incubator-pirk,代碼行數:53,代碼來源:ComputeStreamingResponse.java


注:本文中的org.apache.spark.streaming.api.java.JavaDStream.filter方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。