当前位置: 首页>>代码示例>>Java>>正文


Java SparkConf.setAppName方法代码示例

本文整理汇总了Java中org.apache.spark.SparkConf.setAppName方法的典型用法代码示例。如果您正苦于以下问题:Java SparkConf.setAppName方法的具体用法?Java SparkConf.setAppName怎么用?Java SparkConf.setAppName使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.spark.SparkConf的用法示例。


在下文中一共展示了SparkConf.setAppName方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.spark.SparkConf; //导入方法依赖的package包/类
public static void main(String[] args) 
{
 SparkConf conf = new SparkConf();
 conf.setAppName("Wordcount Background");
 conf.setMaster("local");
  
 
 JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(15));
 
 
 JavaDStream<String> lines = ssc.textFileStream("/home/rahul/DATASET");
 JavaDStream<String> words = lines.flatMap(WORDS_EXTRACTOR);
 JavaPairDStream<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
 JavaPairDStream<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
 
 counter.print();
 
 ssc.start();
 
 ssc.awaitTermination();
 

 /*JavaRDD<String> file = context.textFile("/home/rahul/Desktop/palestine.txt");
 JavaRDD<String> words = file.flatMap(WORDS_EXTRACTOR);
 JavaPairRDD<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
 JavaPairRDD<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
 counter.saveAsTextFile("/home/rahul/Desktop/wc"); 
 context.close();*/
}
 
开发者ID:arks-api,项目名称:arks-api,代码行数:30,代码来源:WordCount.java

示例2: provide

import org.apache.spark.SparkConf; //导入方法依赖的package包/类
/**
 * Provide a {@link JavaSparkContext} based on default settings
 *
 * @return a {@link JavaSparkContext} based on default settings
 */
public static JavaSparkContext provide() {
    SparkConf config = new SparkConf()
            .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
            .registerKryoClasses(getSerializableClasses());

    if (!config.contains("spark.app.name")) {
        config.setAppName("RDF2X");
    }
    if (!config.contains("spark.master")) {
        config.setMaster("local");
    }

    // set serialization registration required if you want to make sure you registered all your classes
    // some spark internal classes will need to be registered as well
    // config.set("spark.kryo.registrationRequired", "true");


    log.info("Getting Spark Context for config: \n{}", config.toDebugString());
    return new JavaSparkContext(config);
}
 
开发者ID:Merck,项目名称:rdf2x,代码行数:26,代码来源:SparkContextProvider.java

示例3: configureSparkContext

import org.apache.spark.SparkConf; //导入方法依赖的package包/类
private void configureSparkContext(Properties properties) {
    SparkConf sparkConf = new SparkConf();
    sparkConf.setAppName("Write pipeline");
    sparkConf.set("spark.driver.allowMultipleContexts", "true");

    sparkConf.setMaster(properties.getProperty("spark.master"));
    sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");

    sparkConf.set("spark.cassandra.connection.host", properties.getProperty("cassandra.nodes"));
    sparkConf.set("spark.cassandra.output.batch.size.bytes", properties.getProperty("cassandra.batch.size.bytes"));
    sparkConf.set("spark.cassandra.connection.port", properties.getProperty("cassandra.port"));

    sparkConf.set("es.nodes", properties.getProperty("elasticsearch.nodes") + ":" + properties.getProperty("elasticsearch.port.rest"));
    sparkConf.set("es.batch.size.entries", properties.getProperty("elasticsearch.batch.size.entries"));
    sparkConf.set("es.batch.size.bytes", properties.getProperty("elasticsearch.batch.size.bytes"));
    sparkConf.set("es.nodes.discovery", properties.getProperty("elasticsearch.nodes.dicovery"));

    sparkContext = new JavaSparkContext(sparkConf);
}
 
开发者ID:echauchot,项目名称:bigDataRocks,代码行数:20,代码来源:WritePipeline.java

示例4: createContext

import org.apache.spark.SparkConf; //导入方法依赖的package包/类
@BeforeClass
public static void createContext() throws IOException {

	Configuration hdfsConfig = HDFSUtils.getConfiguration();
	SparkConf config = new SparkConf();
	config.setMaster("local[*]");
	config.setAppName("my JUnit running Spark");
	sc = new JavaSparkContext(config);
	fileSystem = FileSystem.get(hdfsConfig);
	sqlContext = new SQLContext(sc);
	engine = new ParquetRepartEngine(fileSystem, sqlContext);
}
 
开发者ID:pfratta,项目名称:ParquetUtils,代码行数:13,代码来源:ParquetRepartTest.java

示例5: buildStreamingContext

import org.apache.spark.SparkConf; //导入方法依赖的package包/类
protected final JavaStreamingContext buildStreamingContext() {
  log.info("Starting SparkContext with interval {} seconds", generationIntervalSec);

  SparkConf sparkConf = new SparkConf();

  // Only for tests, really
  if (sparkConf.getOption("spark.master").isEmpty()) {
    log.info("Overriding master to {} for tests", streamingMaster);
    sparkConf.setMaster(streamingMaster);
  }
  // Only for tests, really
  if (sparkConf.getOption("spark.app.name").isEmpty()) {
    String appName = "Oryx" + getLayerName();
    if (id != null) {
      appName = appName + "-" + id;
    }
    log.info("Overriding app name to {} for tests", appName);
    sparkConf.setAppName(appName);
  }
  extraSparkConfig.forEach((key, value) -> sparkConf.setIfMissing(key, value.toString()));

  // Turn this down to prevent long blocking at shutdown
  sparkConf.setIfMissing(
      "spark.streaming.gracefulStopTimeout",
      Long.toString(TimeUnit.MILLISECONDS.convert(generationIntervalSec, TimeUnit.SECONDS)));
  sparkConf.setIfMissing("spark.cleaner.ttl", Integer.toString(20 * generationIntervalSec));
  long generationIntervalMS =
      TimeUnit.MILLISECONDS.convert(generationIntervalSec, TimeUnit.SECONDS);

  JavaSparkContext jsc = JavaSparkContext.fromSparkContext(SparkContext.getOrCreate(sparkConf));
  return new JavaStreamingContext(jsc, new Duration(generationIntervalMS));
}
 
开发者ID:oncewang,项目名称:oryx2,代码行数:33,代码来源:AbstractSparkLayer.java

示例6: getSparkConf

import org.apache.spark.SparkConf; //导入方法依赖的package包/类
public SparkConf getSparkConf() {
    SparkConf sparkConf = new SparkConf();
    sparkConf.set("spark.streaming.kafka.maxRatePerPartition",
            config.getSparkStreamingKafkaMaxRatePerPartition()); // rate limiting
    sparkConf.setAppName("StreamingEngine-" + config.getTopicSet().toString() + "-" + config.getNamespace());

    if (config.getLocalMode()) {
        sparkConf.setMaster("local[4]");
    }
    return sparkConf;
}
 
开发者ID:ameyamk,项目名称:spark-streaming-direct-kafka,代码行数:12,代码来源:AbstractSparkLayer.java

示例7: main

import org.apache.spark.SparkConf; //导入方法依赖的package包/类
public static void main(String... args) {
  SparkConf conf = new SparkConf();
  conf.setMaster("local[2]");
  conf.setAppName("Spark Streaming Test Java");

  JavaSparkContext sc = new JavaSparkContext(conf);
  JavaStreamingContext ssc = new JavaStreamingContext(sc, Durations.seconds(10));

  processStream(ssc, sc);

  ssc.start();
  ssc.awaitTermination();
}
 
开发者ID:opencore,项目名称:kafka-spark-avro-example,代码行数:14,代码来源:SparkStreaming.java

示例8: setupTest

import org.apache.spark.SparkConf; //导入方法依赖的package包/类
@After
@Before
public void setupTest() {
    SparkConf sparkConfiguration = new SparkConf();
    sparkConfiguration.setAppName(this.getClass().getCanonicalName() + "-setupTest");
    sparkConfiguration.set("spark.master", "local[4]");
    JavaSparkContext sparkContext = new JavaSparkContext(SparkContext.getOrCreate(sparkConfiguration));
    sparkContext.close();
    Spark.create(sparkContext.sc());
    Spark.close();
    logger.info("SparkContext has been closed for " + this.getClass().getCanonicalName() + "-setupTest");
}
 
开发者ID:PKUSilvester,项目名称:LiteGraph,代码行数:13,代码来源:AbstractSparkTest.java

示例9: createSparkContext

import org.apache.spark.SparkConf; //导入方法依赖的package包/类
private static JavaSparkContext createSparkContext(SparkContextOptions contextOptions) {
  if (usesProvidedSparkContext) {
    LOG.info("Using a provided Spark Context");
    JavaSparkContext jsc = contextOptions.getProvidedSparkContext();
    if (jsc == null || jsc.sc().isStopped()){
      LOG.error("The provided Spark context " + jsc + " was not created or was stopped");
      throw new RuntimeException("The provided Spark context was not created or was stopped");
    }
    return jsc;
  } else {
    LOG.info("Creating a brand new Spark Context.");
    SparkConf conf = new SparkConf();
    if (!conf.contains("spark.master")) {
      // set master if not set.
      conf.setMaster(contextOptions.getSparkMaster());
    }

    if (contextOptions.getFilesToStage() != null && !contextOptions.getFilesToStage().isEmpty()) {
      conf.setJars(contextOptions.getFilesToStage().toArray(new String[0]));
    }

    conf.setAppName(contextOptions.getAppName());
    // register immutable collections serializers because the SDK uses them.
    conf.set("spark.kryo.registrator", BeamSparkRunnerRegistrator.class.getName());
    return new JavaSparkContext(conf);
  }
}
 
开发者ID:apache,项目名称:beam,代码行数:28,代码来源:SparkContextFactory.java

示例10: main

import org.apache.spark.SparkConf; //导入方法依赖的package包/类
public static void main(String args[]) {
    SparkConf sparkConf = new SparkConf();
    sparkConf.setAppName("spark-phoenix-df");
    sparkConf.setMaster("local[*]");
    JavaSparkContext sc = new JavaSparkContext(sparkConf);
    SQLContext sqlContext = new org.apache.spark.sql.SQLContext(sc);

    DataFrame df = sqlContext.read()
            .format("org.apache.phoenix.spark")
            .option("table", "ORDERS")
            .option("zkUrl", "localhost:2181")
            .load();
    df.count();

}
 
开发者ID:mravi,项目名称:pro-phoenix,代码行数:16,代码来源:PhoenixSparkDf.java

示例11: setup

import org.apache.spark.SparkConf; //导入方法依赖的package包/类
@Before
public void setup() {
    SparkConf sparkConf = new SparkConf();
    String master = "local[2]";
    sparkConf.setMaster(master);
    sparkConf.setAppName("Local Spark Unit Test");
    sc = new JavaSparkContext(new SparkContext(sparkConf));
    sqlContext = new SQLContext(sc);
}
 
开发者ID:flipkart-incubator,项目名称:spark-transformers,代码行数:10,代码来源:SparkTestBase.java

示例12: run

import org.apache.spark.SparkConf; //导入方法依赖的package包/类
public void run() throws IOException {
    FileSystem fs = DistributedFileSystem.get(new Configuration());
    Path inpath = new Path(input);
    Path outpath = new Path(output);
    if (!fs.exists(inpath)) {
        throw new IllegalArgumentException("Input file not found: " + inpath);
    }
    if (fs.exists(outpath)) {
        throw new IllegalArgumentException("Output file exists, Not overwriting it: " + inpath);
    }

    SparkConf conf = new SparkConf();
    conf.setMaster(sparkMaster);
    conf.setAppName(getClass().getSimpleName() + "::" + System.currentTimeMillis());
    JavaSparkContext ctx = new JavaSparkContext(conf);

    //STEP1: READ
    JavaPairRDD<Text, BytesWritable> rdd = ctx.sequenceFile(input, Text.class, BytesWritable.class);
            //.mapToPair(rec -> new Tuple2<>(new Text(rec._1()), new BytesWritable(rec._2().getBytes())));
    //STEP2: PARSE
    JavaPairRDD<Text, Metadata> parsedRDD = rdd.mapToPair(
            (PairFunction<Tuple2<Text, BytesWritable>, Text, Metadata>) rec -> {
                Metadata md = new Metadata();
                try (ByteArrayInputStream stream = new ByteArrayInputStream(rec._2().getBytes())) {
                    String content = TikaHolder.tika.parseToString(stream, md);
                    md.add("CONTENT", content);
                }
                return new Tuple2<>(rec._1(), md);
            });
    //STEP3: FORMAT
    JavaRDD<String> outRDD = parsedRDD.map((Function<Tuple2<Text, Metadata>, String>) rec -> {
        String key = rec._1().toString();
        Metadata metadata = rec._2();
        JSONObject object = new JSONObject();
        for (String name : metadata.names()) {
            if (metadata.isMultiValued(name)) {
                JSONArray arr = new JSONArray();
                for (String val : metadata.getValues(name)) {
                    arr.add(val);
                }
                object.put(name, arr);
            } else {
                object.put(name, metadata.get(name));
            }
        }
        return key + "\t\t" + object.toJSONString();
    });
    //STEP4: SAVE
    LOG.info("Saving at " + outpath);
    outRDD.saveAsTextFile(output);
    LOG.info("Stopping");
    ctx.stop();
}
 
开发者ID:thammegowda,项目名称:tika-dl4j-spark-imgrec,代码行数:54,代码来源:TikaSpark.java

示例13: execute

import org.apache.spark.SparkConf; //导入方法依赖的package包/类
@SuppressWarnings("deprecation")
private void execute() {
	SparkConf conf = new SparkConf();
	conf.setAppName("cassandra-spark-poc");
	conf.setMaster("local[*]");

	SparkContext sparkContext = new SparkContext(conf);

	System.out.println(sparkContext);

	SparkSession sparkSession = SparkSession.builder().appName("cassandra-spark-poc").master("local[*]")
			.getOrCreate();

	SQLContext sqlContext = new SQLContext(sparkSession);

	Map<String, String> options = new HashMap<String, String>();
	options.put("keyspace", "wootag");
	options.put("table", "video_view");

	Dataset<Row> dataset = sqlContext.read().format("org.apache.spark.sql.cassandra").options(options).load()
			.cache();

	dataset.registerTempTable("temptable");

	String query = "select video_id, view_duration_in_second, count(*) from temptable group by 1, 2";

	List<Row> collectAsList = sqlContext.sql(query).collectAsList();
	for (Row row : collectAsList) {
		System.out.println(row.get(0) + "," + row.get(1) + "," + row.get(2));
	}

	// sqlContext.sql(query).show(1000);

	long startTime = 1485907200000L;
	long endTime = 1487226374000L;

	for (long i = startTime; i <= endTime; i = i + TimeUnit.DAYS.toMillis(1)) {

		dataset.filter(new Column("event_start_timestamp").geq(i))
				.filter(new Column("event_start_timestamp").leq(i + TimeUnit.DAYS.toMillis(1)))
				.groupBy(new Column("view_duration_in_second"), new Column("video_id")).count()
				.orderBy("view_duration_in_second").show(1000);
		sleepDelay();
		
	}

}
 
开发者ID:alokawi,项目名称:spark-cassandra-poc,代码行数:48,代码来源:SparkCassandraUtils.java

示例14: create

import org.apache.spark.SparkConf; //导入方法依赖的package包/类
public static void create(final Configuration configuration) {
    final SparkConf sparkConf = new SparkConf();
    configuration.getKeys().forEachRemaining(key -> sparkConf.set(key, configuration.getProperty(key).toString()));
    sparkConf.setAppName("Apache TinkerPop's Spark-Gremlin");
    CONTEXT = SparkContext.getOrCreate(sparkConf);
}
 
开发者ID:PKUSilvester,项目名称:LiteGraph,代码行数:7,代码来源:Spark.java

示例15: shouldSetThreadLocalProperties

import org.apache.spark.SparkConf; //导入方法依赖的package包/类
@Test
public void shouldSetThreadLocalProperties() throws Exception {
    final String testName = "ThreadLocalProperties";
    final String rddName = TestHelper.makeTestDataDirectory(LocalPropertyTest.class) + UUID.randomUUID().toString();
    final Configuration configuration = new BaseConfiguration();
    configuration.setProperty("spark.master", "local[4]");
    configuration.setProperty("spark.serializer", GryoSerializer.class.getCanonicalName());
    configuration.setProperty(Graph.GRAPH, HadoopGraph.class.getName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern.kryo"));
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName);
    configuration.setProperty(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false);
    configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);
    configuration.setProperty("spark.jobGroup.id", "22");
    Graph graph = GraphFactory.open(configuration);
    graph.compute(SparkGraphComputer.class)
            .result(GraphComputer.ResultGraph.NEW)
            .persist(GraphComputer.Persist.EDGES)
            .program(TraversalVertexProgram.build()
                    .traversal(graph.traversal().withComputer(Computer.compute(SparkGraphComputer.class)),
                            "gremlin-groovy",
                            "g.V()").create(graph)).submit().get();
    ////////
    SparkConf sparkConfiguration = new SparkConf();
    sparkConfiguration.setAppName(testName);
    ConfUtil.makeHadoopConfiguration(configuration).forEach(entry -> sparkConfiguration.set(entry.getKey(), entry.getValue()));
    JavaSparkContext sparkContext = new JavaSparkContext(SparkContext.getOrCreate(sparkConfiguration));
    JavaSparkStatusTracker statusTracker = sparkContext.statusTracker();
    assertTrue(statusTracker.getJobIdsForGroup("22").length >= 1);
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    ///////
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, PersistedInputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, rddName);
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, null);
    configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, null);
    configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, false);
    configuration.setProperty("spark.jobGroup.id", "44");
    graph = GraphFactory.open(configuration);
    graph.compute(SparkGraphComputer.class)
            .result(GraphComputer.ResultGraph.NEW)
            .persist(GraphComputer.Persist.NOTHING)
            .program(TraversalVertexProgram.build()
                    .traversal(graph.traversal().withComputer(SparkGraphComputer.class),
                            "gremlin-groovy",
                            "g.V()").create(graph)).submit().get();
    ///////
    assertTrue(statusTracker.getJobIdsForGroup("44").length >= 1);
}
 
开发者ID:PKUSilvester,项目名称:LiteGraph,代码行数:50,代码来源:LocalPropertyTest.java


注:本文中的org.apache.spark.SparkConf.setAppName方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。