当前位置: 首页>>代码示例>>Java>>正文


Java Function类代码示例

本文整理汇总了Java中org.apache.spark.api.java.function.Function的典型用法代码示例。如果您正苦于以下问题:Java Function类的具体用法?Java Function怎么用?Java Function使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


Function类属于org.apache.spark.api.java.function包,在下文中一共展示了Function类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: createNGramDataFrame

import org.apache.spark.api.java.function.Function; //导入依赖的package包/类
/**
 * Creates a n-gram data frame from text lines.
 * @param lines
 * @return a n-gram data frame.
 */
DataFrame createNGramDataFrame(JavaRDD<String> lines) {
	JavaRDD<Row> rows = lines.map(new Function<String, Row>(){
		private static final long serialVersionUID = -4332903997027358601L;
		
		@Override
		public Row call(String line) throws Exception {
			return RowFactory.create(Arrays.asList(line.split("\\s+")));
		}
	});
	StructType schema = new StructType(new StructField[] {
			new StructField("words",
					DataTypes.createArrayType(DataTypes.StringType), false,
					Metadata.empty()) });
	DataFrame wordDF = new SQLContext(jsc).createDataFrame(rows, schema);
	// build a bigram language model
	NGram transformer = new NGram().setInputCol("words")
			.setOutputCol("ngrams").setN(2);
	DataFrame ngramDF = transformer.transform(wordDF);
	ngramDF.show(10, false);
	return ngramDF;
}
 
开发者ID:phuonglh,项目名称:vn.vitk,代码行数:27,代码来源:NGramBuilder.java

示例2: parse

import org.apache.spark.api.java.function.Function; //导入依赖的package包/类
/**
 * Parses a list of PoS-tagged sentences, each on a line and writes the result to an output 
 * file in a specified output format.
 * @param jsc
 * @param sentences
 * @param outputFileName
 * @param outuptFormat
 */
public void parse(JavaSparkContext jsc, List<String> sentences, String outputFileName, OutputFormat outputFormat) {
	JavaRDD<String> input = jsc.parallelize(sentences);
	JavaRDD<Sentence> sents = input.map(new TaggedLineToSentenceFunction());
	JavaRDD<DependencyGraph> graphs = sents.map(new ParsingFunction());
	JavaRDD<Row> rows = graphs.map(new Function<DependencyGraph, Row>() {
		private static final long serialVersionUID = -812004521983071103L;
		public Row call(DependencyGraph graph) {
			return RowFactory.create(graph.getSentence().toString(), graph.dependencies());
		}
	});
	StructType schema = new StructType(new StructField[]{
		new StructField("sentence", DataTypes.StringType, false, Metadata.empty()),	
		new StructField("dependency", DataTypes.StringType, false, Metadata.empty())
	});
	SQLContext sqlContext = new SQLContext(jsc);
	DataFrame df = sqlContext.createDataFrame(rows, schema);
	
	if (outputFormat == OutputFormat.TEXT)  
		df.select("dependency").write().text(outputFileName);
	else 
		df.repartition(1).write().json(outputFileName);
}
 
开发者ID:phuonglh,项目名称:vn.vitk,代码行数:31,代码来源:DependencyParser.java

示例3: main

import org.apache.spark.api.java.function.Function; //导入依赖的package包/类
public static void main(String[] args) throws Exception {

    if (args.length != 1) {
      System.err.println("Usage: JavaSleep <seconds>");
      System.exit(1);
    }

    SparkConf sparkConf = new SparkConf().setAppName("JavaSleep");
    JavaSparkContext ctx = new JavaSparkContext(sparkConf);
    Integer parallel = sparkConf.getInt("spark.default.parallelism", ctx.defaultParallelism());
    Integer seconds = Integer.parseInt(args[0]);

    Integer[] init_val = new Integer[parallel];
    Arrays.fill(init_val, seconds);

    JavaRDD<Integer> workload = ctx.parallelize(Arrays.asList(init_val), parallel).map(new Function<Integer, Integer>() {
      @Override
      public Integer call(Integer s) throws InterruptedException {
	    Thread.sleep(s * 1000);
        return 0;
      }
    });

    List<Integer> output = workload.collect();
    ctx.stop();
  }
 
开发者ID:thrill,项目名称:fst-bench,代码行数:27,代码来源:JavaSleep.java

示例4: main

import org.apache.spark.api.java.function.Function; //导入依赖的package包/类
public static void main(String[] args) throws IOException {
	Flags.setFromCommandLineArgs(THE_OPTIONS, args);

	// 初始化Spark Conf.
	SparkConf conf = new SparkConf().setAppName("A SECTONG Application: Apache Log Analysis with Spark");
	JavaSparkContext sc = new JavaSparkContext(conf);
	JavaStreamingContext jssc = new JavaStreamingContext(sc, Flags.getInstance().getSlideInterval());
	SQLContext sqlContext = new SQLContext(sc);

	// 初始化参数
	HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(Flags.getInstance().getKafka_topic().split(",")));
	HashMap<String, String> kafkaParams = new HashMap<String, String>();
	kafkaParams.put("metadata.broker.list", Flags.getInstance().getKafka_broker());

	// 从Kafka Stream获取数据
	JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(jssc, String.class, String.class,
			StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet);

	JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
		private static final long serialVersionUID = 5266880065425088203L;

		public String call(Tuple2<String, String> tuple2) {
			return tuple2._2();
		}
	});

	JavaDStream<ApacheAccessLog> accessLogsDStream = lines.flatMap(line -> {
		List<ApacheAccessLog> list = new ArrayList<>();
		try {
			// 映射每一行
			list.add(ApacheAccessLog.parseFromLogLine(line));
			return list;
		} catch (RuntimeException e) {
			return list;
		}
	}).cache();

	accessLogsDStream.foreachRDD(rdd -> {

		// rdd to DataFrame
		DataFrame df = sqlContext.createDataFrame(rdd, ApacheAccessLog.class);
		// 写入Parquet文件
		df.write().partitionBy("ipAddress", "method", "responseCode").mode(SaveMode.Append).parquet(Flags.getInstance().getParquetFile());

		return null;
	});

	// 启动Streaming服务器
	jssc.start(); // 启动计算
	jssc.awaitTermination(); // 等待终止
}
 
开发者ID:sectong,项目名称:SparkToParquet,代码行数:52,代码来源:AppMain.java

示例5: predictForOutput_LogisticRegressionModel

import org.apache.spark.api.java.function.Function; //导入依赖的package包/类
public static JavaRDD<Tuple2<Object, Object>> predictForOutput_LogisticRegressionModel(LogisticRegressionModel model, JavaRDD<LabeledPoint> data){
    JavaRDD<Tuple2<Object, Object>> FeaturesAndPrediction = data.map(
      new Function<LabeledPoint, Tuple2<Object, Object>>() {
        private static final long serialVersionUID = 1L;
        public Tuple2<Object, Object> call(LabeledPoint p) {
          Double prediction = model.predict(p.features());
          return new Tuple2<Object, Object>(p.features(), prediction);
        }
      }
    );
    return FeaturesAndPrediction;    
}
 
开发者ID:Chih-Ling-Hsu,项目名称:Spark-Machine-Learning-Modules,代码行数:13,代码来源:PredictUnit.java

示例6: main

import org.apache.spark.api.java.function.Function; //导入依赖的package包/类
public static void main(String[] args) throws Exception {

    if (args.length < 2) {
      System.err.println("Usage: JavaTeraSort <HDFS_INPUT> <HDFS_OUTPUT>");
      System.exit(1);
    }

    SparkConf sparkConf = new SparkConf().setAppName("JavaTeraSort");
    JavaSparkContext ctx = new JavaSparkContext(sparkConf);
    JavaRDD<String> lines = ctx.textFile(args[0], 1);
    Integer parallel = sparkConf.getInt("spark.default.parallelism", ctx.defaultParallelism());
    Integer reducer  = Integer.parseInt(IOCommon.getProperty("hibench.default.shuffle.parallelism").get());
    JavaPairRDD<String, String> words = lines.mapToPair(new PairFunction<String, String, String>() {
        @Override
        public Tuple2<String, String> call(String s) throws Exception {
            return new Tuple2<String, String>(s.substring(0, 10), s.substring(10));
        }
    });


    JavaPairRDD<String, String> sorted = words.sortByKey(true, reducer);

    JavaRDD<String> result = sorted.map(new Function<Tuple2<String, String>, String>() {
        @Override
        public String call(Tuple2<String, String> e) throws Exception {
            return e._1() + e._2();
        }
    });

    result.saveAsTextFile(args[1]);

    ctx.stop();
  }
 
开发者ID:thrill,项目名称:fst-bench,代码行数:34,代码来源:JavaTeraSort.java

示例7: performJavaStream

import org.apache.spark.api.java.function.Function; //导入依赖的package包/类
static List<StreamVectors> performJavaStream(String appName, List<StreamVectors> input, int noIters) {
    JavaRDD<StreamVectors> streamVectorsJavaRDD = ExampleUtils.getSparkContext(appName).parallelize(input);
    for (int i = 0; i < noIters; i++) {
        streamVectorsJavaRDD = streamVectorsJavaRDD.map(new Function<StreamVectors, StreamVectors>() {
            @Override
            public StreamVectors call(StreamVectors streamVectors) throws Exception {
                streamVectors.setStartRun(System.nanoTime());
                for (int idx = 0; idx < streamVectors.A.length; idx++) {
                    streamVectors.C[idx] = streamVectors.A[idx];
                }
                for (int idx = 0; idx < streamVectors.A.length; idx++) {
                    streamVectors.B[idx] = streamVectors.scaling_constant * streamVectors.C[idx];
                }
                for (int idx = 0; idx < streamVectors.A.length; idx++) {
                    streamVectors.C[idx] = streamVectors.A[idx] + streamVectors.B[idx];
                }
                for (int idx = 0; idx < streamVectors.A.length; idx++) {
                    streamVectors.A[idx] = streamVectors.B[idx] + streamVectors.scaling_constant * streamVectors.C[idx];
                }
                streamVectors.setEndRun(System.nanoTime());
                return streamVectors;
            }
        });
    }
    return streamVectorsJavaRDD.collect();
}
 
开发者ID:tudorv91,项目名称:SparkJNI,代码行数:27,代码来源:StreamUtils.java

示例8: performJavaStream

import org.apache.spark.api.java.function.Function; //导入依赖的package包/类
private List<StreamVectors> performJavaStream(String appName, List<StreamVectors> input) {
    return ExampleUtils.getSparkContext(appName).parallelize(input).map(new Function<StreamVectors, StreamVectors>() {
        @Override
        public StreamVectors call(StreamVectors streamVectors) throws Exception {
            streamVectors.setStartRun(System.nanoTime());
            for(int idx = 0; idx < streamVectors.A.length; idx++){
                streamVectors.C[idx] = streamVectors.A[idx];
            }
            for(int idx = 0; idx < streamVectors.A.length; idx++){
                streamVectors.B[idx] = streamVectors.scaling_constant * streamVectors.C[idx];
            }
            for(int idx = 0; idx < streamVectors.A.length; idx++){
                streamVectors.C[idx] = streamVectors.A[idx] + streamVectors.B[idx];
            }
            for(int idx = 0; idx < streamVectors.A.length; idx++){
                streamVectors.A[idx] = streamVectors.B[idx] + streamVectors.scaling_constant * streamVectors.C[idx];
            }
            streamVectors.setEndRun(System.nanoTime());
            return streamVectors;
        }
    }).collect();
}
 
开发者ID:tudorv91,项目名称:SparkJNI,代码行数:23,代码来源:StreamMain.java

示例9: call

import org.apache.spark.api.java.function.Function; //导入依赖的package包/类
@Override
public void call(JavaPairRDD<PublisherGeoKey, AggregationLog> logsRDD) throws Exception {

    if (logsRDD != null) {
        LOG.info(" Data to process in RDD:" + logsRDD.count());

        JavaRDD<AggregationResult> aggResRDD = logsRDD.map(new Function<Tuple2<PublisherGeoKey, AggregationLog>, AggregationResult>() {
            @Override
            public AggregationResult call(
                    Tuple2<PublisherGeoKey, AggregationLog> arg0)
                    throws Exception {
                PublisherGeoKey p = arg0._1;
                AggregationLog a = arg0._2;
                return new AggregationResult(new Timestamp(a.getTimestamp()),
                        p.getPublisher(), p.getGeo(), a.getImps(),
                        (int) a.getUniquesHll().estimatedSize(),
                        a.getSumBids() / a.getImps());
            }
        });
        LOG.info(" Call Data Process Partition");
        aggResRDD.foreachPartition(new SaveLogAggPartition());
    } else
        LOG.error("Data to process:" + 0);
}
 
开发者ID:splicemachine,项目名称:splice-community-sample-code,代码行数:25,代码来源:SaveLogAggRDD.java

示例10: main

import org.apache.spark.api.java.function.Function; //导入依赖的package包/类
public static void main(String[] args) {
    SparkConf conf = new SparkConf().setAppName("Big Apple").setMaster("local");
    JavaSparkContext sc = new JavaSparkContext(conf);

    class GetLength implements Function<String, Integer> {
        public Integer call(String s) {
            return s.length();
        }
    }

    class Sum implements Function2<Integer, Integer, Integer> {
        public Integer call(Integer a, Integer b) {
            return a + b;
        }
    }

    JavaRDD<String> lines = sc.textFile("src/main/resources/compressed.gz");
    JavaRDD<Integer> lineLengths = lines.map(new GetLength());
    // Printing an RDD
    lineLengths.foreach(x-> System.out.println(x));

    int totalLength = lineLengths.reduce(new Sum());

    System.out.println(totalLength);
}
 
开发者ID:knoldus,项目名称:Sparkathon,代码行数:26,代码来源:PassingFunctions.java

示例11: compile

import org.apache.spark.api.java.function.Function; //导入依赖的package包/类
/**
 * Create an appropriate {@link Function}-based predicate for deploying the given {@link PredicateDescriptor}
 * on Apache Spark.
 *
 * @param predicateDescriptor describes the function
 * @param operator            that executes the {@link Function}; only required if the {@code descriptor} describes an {@link ExtendedFunction}
 * @param operatorContext     contains optimization information for the {@code operator}
 * @param inputs              that feed the {@code operator}; only required if the {@code descriptor} describes an {@link ExtendedFunction}
 */
public <Type> Function<Type, Boolean> compile(
        PredicateDescriptor<Type> predicateDescriptor,
        SparkExecutionOperator operator,
        OptimizationContext.OperatorContext operatorContext,
        ChannelInstance[] inputs) {
    final Predicate<Type> javaImplementation = predicateDescriptor.getJavaImplementation();
    if (javaImplementation instanceof PredicateDescriptor.ExtendedSerializablePredicate) {
        return new ExtendedPredicateAdapater<>(
                (PredicateDescriptor.ExtendedSerializablePredicate<Type>) javaImplementation,
                new SparkExecutionContext(operator, inputs, operatorContext.getOptimizationContext().getIterationNumber())
        );
    } else {
        return new PredicateAdapter<>(javaImplementation);
    }
}
 
开发者ID:daqcri,项目名称:rheem,代码行数:25,代码来源:FunctionCompiler.java

示例12: toTaggedSentence

import org.apache.spark.api.java.function.Function; //导入依赖的package包/类
private JavaRDD<String> toTaggedSentence(DataFrame output) {
	return output.javaRDD().map(new Function<Row, String>() {
		private static final long serialVersionUID = 4208643510231783579L;
		@Override
		public String call(Row row) throws Exception {
			String[] tokens = row.getString(0).trim().split("\\s+");
			String[] tags = row.getString(1).trim().split("\\s+");
			if (tokens.length != tags.length) {
				System.err.println("Incompatible lengths!");
				return null;
			}
			StringBuilder sb = new StringBuilder(64);
			for (int j = 0; j < tokens.length; j++) {
				sb.append(tokens[j]);
				sb.append('/');
				sb.append(tags[j]);
				sb.append(' ');
			}
			return sb.toString().trim();
		}
	});
}
 
开发者ID:phuonglh,项目名称:vn.vitk,代码行数:23,代码来源:Tagger.java

示例13: numCharacters

import org.apache.spark.api.java.function.Function; //导入依赖的package包/类
/**
 * Counts the number of non-space characters in this data set. This utility method 
 * is used to check the tokenization result.
 * @param lines
 * @return number of characters
 */
int numCharacters(JavaRDD<String> lines) {
	JavaRDD<Integer> lengths = lines.map(new Function<String, Integer>() {
		private static final long serialVersionUID = -2189399343462982586L;
		@Override
		public Integer call(String line) throws Exception {
			line = line.replaceAll("[\\s_]+", "");
			return line.length();
		}
	});
	return lengths.reduce(new Function2<Integer, Integer, Integer>() {
		private static final long serialVersionUID = -8438072946884289401L;

		@Override
		public Integer call(Integer e0, Integer e1) throws Exception {
			return e0 + e1;
		}
	});
}
 
开发者ID:phuonglh,项目名称:vn.vitk,代码行数:25,代码来源:Tokenizer.java

示例14: toPairFlatMapFunction

import org.apache.spark.api.java.function.Function; //导入依赖的package包/类
/** {@link KV} to pair flatmap function. */
public static <K, V> PairFlatMapFunction<Iterator<KV<K, V>>, K, V> toPairFlatMapFunction() {
  return new PairFlatMapFunction<Iterator<KV<K, V>>, K, V>() {
    @Override
    public Iterator<Tuple2<K, V>> call(final Iterator<KV<K, V>> itr) {
      final Iterator<Tuple2<K, V>> outputItr =
          Iterators.transform(
              itr,
              new com.google.common.base.Function<KV<K, V>, Tuple2<K, V>>() {

                @Override
                public Tuple2<K, V> apply(KV<K, V> kv) {
                  return new Tuple2<>(kv.getKey(), kv.getValue());
                }
              });
      return outputItr;
    }
  };
}
 
开发者ID:apache,项目名称:beam,代码行数:20,代码来源:TranslationUtils.java

示例15: fromPairFlatMapFunction

import org.apache.spark.api.java.function.Function; //导入依赖的package包/类
/** A pair to {@link KV} flatmap function . */
static <K, V> FlatMapFunction<Iterator<Tuple2<K, V>>, KV<K, V>> fromPairFlatMapFunction() {
  return new FlatMapFunction<Iterator<Tuple2<K, V>>, KV<K, V>>() {
    @Override
    public Iterator<KV<K, V>> call(Iterator<Tuple2<K, V>> itr) {
      final Iterator<KV<K, V>> outputItr =
          Iterators.transform(
              itr,
              new com.google.common.base.Function<Tuple2<K, V>, KV<K, V>>() {
                @Override
                public KV<K, V> apply(Tuple2<K, V> t2) {
                  return KV.of(t2._1(), t2._2());
                }
              });
      return outputItr;
    }
  };
}
 
开发者ID:apache,项目名称:beam,代码行数:19,代码来源:TranslationUtils.java


注:本文中的org.apache.spark.api.java.function.Function类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。