本文整理汇总了Java中org.apache.spark.SparkConf.setAppName方法的典型用法代码示例。如果您正苦于以下问题:Java SparkConf.setAppName方法的具体用法?Java SparkConf.setAppName怎么用?Java SparkConf.setAppName使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.SparkConf
的用法示例。
在下文中一共展示了SparkConf.setAppName方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.spark.SparkConf; //导入方法依赖的package包/类
public static void main(String[] args)
{
SparkConf conf = new SparkConf();
conf.setAppName("Wordcount Background");
conf.setMaster("local");
JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(15));
JavaDStream<String> lines = ssc.textFileStream("/home/rahul/DATASET");
JavaDStream<String> words = lines.flatMap(WORDS_EXTRACTOR);
JavaPairDStream<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
JavaPairDStream<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
counter.print();
ssc.start();
ssc.awaitTermination();
/*JavaRDD<String> file = context.textFile("/home/rahul/Desktop/palestine.txt");
JavaRDD<String> words = file.flatMap(WORDS_EXTRACTOR);
JavaPairRDD<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
JavaPairRDD<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
counter.saveAsTextFile("/home/rahul/Desktop/wc");
context.close();*/
}
示例2: provide
import org.apache.spark.SparkConf; //导入方法依赖的package包/类
/**
* Provide a {@link JavaSparkContext} based on default settings
*
* @return a {@link JavaSparkContext} based on default settings
*/
public static JavaSparkContext provide() {
SparkConf config = new SparkConf()
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
.registerKryoClasses(getSerializableClasses());
if (!config.contains("spark.app.name")) {
config.setAppName("RDF2X");
}
if (!config.contains("spark.master")) {
config.setMaster("local");
}
// set serialization registration required if you want to make sure you registered all your classes
// some spark internal classes will need to be registered as well
// config.set("spark.kryo.registrationRequired", "true");
log.info("Getting Spark Context for config: \n{}", config.toDebugString());
return new JavaSparkContext(config);
}
示例3: configureSparkContext
import org.apache.spark.SparkConf; //导入方法依赖的package包/类
private void configureSparkContext(Properties properties) {
SparkConf sparkConf = new SparkConf();
sparkConf.setAppName("Write pipeline");
sparkConf.set("spark.driver.allowMultipleContexts", "true");
sparkConf.setMaster(properties.getProperty("spark.master"));
sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
sparkConf.set("spark.cassandra.connection.host", properties.getProperty("cassandra.nodes"));
sparkConf.set("spark.cassandra.output.batch.size.bytes", properties.getProperty("cassandra.batch.size.bytes"));
sparkConf.set("spark.cassandra.connection.port", properties.getProperty("cassandra.port"));
sparkConf.set("es.nodes", properties.getProperty("elasticsearch.nodes") + ":" + properties.getProperty("elasticsearch.port.rest"));
sparkConf.set("es.batch.size.entries", properties.getProperty("elasticsearch.batch.size.entries"));
sparkConf.set("es.batch.size.bytes", properties.getProperty("elasticsearch.batch.size.bytes"));
sparkConf.set("es.nodes.discovery", properties.getProperty("elasticsearch.nodes.dicovery"));
sparkContext = new JavaSparkContext(sparkConf);
}
示例4: createContext
import org.apache.spark.SparkConf; //导入方法依赖的package包/类
@BeforeClass
public static void createContext() throws IOException {
Configuration hdfsConfig = HDFSUtils.getConfiguration();
SparkConf config = new SparkConf();
config.setMaster("local[*]");
config.setAppName("my JUnit running Spark");
sc = new JavaSparkContext(config);
fileSystem = FileSystem.get(hdfsConfig);
sqlContext = new SQLContext(sc);
engine = new ParquetRepartEngine(fileSystem, sqlContext);
}
示例5: buildStreamingContext
import org.apache.spark.SparkConf; //导入方法依赖的package包/类
protected final JavaStreamingContext buildStreamingContext() {
log.info("Starting SparkContext with interval {} seconds", generationIntervalSec);
SparkConf sparkConf = new SparkConf();
// Only for tests, really
if (sparkConf.getOption("spark.master").isEmpty()) {
log.info("Overriding master to {} for tests", streamingMaster);
sparkConf.setMaster(streamingMaster);
}
// Only for tests, really
if (sparkConf.getOption("spark.app.name").isEmpty()) {
String appName = "Oryx" + getLayerName();
if (id != null) {
appName = appName + "-" + id;
}
log.info("Overriding app name to {} for tests", appName);
sparkConf.setAppName(appName);
}
extraSparkConfig.forEach((key, value) -> sparkConf.setIfMissing(key, value.toString()));
// Turn this down to prevent long blocking at shutdown
sparkConf.setIfMissing(
"spark.streaming.gracefulStopTimeout",
Long.toString(TimeUnit.MILLISECONDS.convert(generationIntervalSec, TimeUnit.SECONDS)));
sparkConf.setIfMissing("spark.cleaner.ttl", Integer.toString(20 * generationIntervalSec));
long generationIntervalMS =
TimeUnit.MILLISECONDS.convert(generationIntervalSec, TimeUnit.SECONDS);
JavaSparkContext jsc = JavaSparkContext.fromSparkContext(SparkContext.getOrCreate(sparkConf));
return new JavaStreamingContext(jsc, new Duration(generationIntervalMS));
}
示例6: getSparkConf
import org.apache.spark.SparkConf; //导入方法依赖的package包/类
public SparkConf getSparkConf() {
SparkConf sparkConf = new SparkConf();
sparkConf.set("spark.streaming.kafka.maxRatePerPartition",
config.getSparkStreamingKafkaMaxRatePerPartition()); // rate limiting
sparkConf.setAppName("StreamingEngine-" + config.getTopicSet().toString() + "-" + config.getNamespace());
if (config.getLocalMode()) {
sparkConf.setMaster("local[4]");
}
return sparkConf;
}
示例7: main
import org.apache.spark.SparkConf; //导入方法依赖的package包/类
public static void main(String... args) {
SparkConf conf = new SparkConf();
conf.setMaster("local[2]");
conf.setAppName("Spark Streaming Test Java");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaStreamingContext ssc = new JavaStreamingContext(sc, Durations.seconds(10));
processStream(ssc, sc);
ssc.start();
ssc.awaitTermination();
}
示例8: setupTest
import org.apache.spark.SparkConf; //导入方法依赖的package包/类
@After
@Before
public void setupTest() {
SparkConf sparkConfiguration = new SparkConf();
sparkConfiguration.setAppName(this.getClass().getCanonicalName() + "-setupTest");
sparkConfiguration.set("spark.master", "local[4]");
JavaSparkContext sparkContext = new JavaSparkContext(SparkContext.getOrCreate(sparkConfiguration));
sparkContext.close();
Spark.create(sparkContext.sc());
Spark.close();
logger.info("SparkContext has been closed for " + this.getClass().getCanonicalName() + "-setupTest");
}
示例9: createSparkContext
import org.apache.spark.SparkConf; //导入方法依赖的package包/类
private static JavaSparkContext createSparkContext(SparkContextOptions contextOptions) {
if (usesProvidedSparkContext) {
LOG.info("Using a provided Spark Context");
JavaSparkContext jsc = contextOptions.getProvidedSparkContext();
if (jsc == null || jsc.sc().isStopped()){
LOG.error("The provided Spark context " + jsc + " was not created or was stopped");
throw new RuntimeException("The provided Spark context was not created or was stopped");
}
return jsc;
} else {
LOG.info("Creating a brand new Spark Context.");
SparkConf conf = new SparkConf();
if (!conf.contains("spark.master")) {
// set master if not set.
conf.setMaster(contextOptions.getSparkMaster());
}
if (contextOptions.getFilesToStage() != null && !contextOptions.getFilesToStage().isEmpty()) {
conf.setJars(contextOptions.getFilesToStage().toArray(new String[0]));
}
conf.setAppName(contextOptions.getAppName());
// register immutable collections serializers because the SDK uses them.
conf.set("spark.kryo.registrator", BeamSparkRunnerRegistrator.class.getName());
return new JavaSparkContext(conf);
}
}
示例10: main
import org.apache.spark.SparkConf; //导入方法依赖的package包/类
public static void main(String args[]) {
SparkConf sparkConf = new SparkConf();
sparkConf.setAppName("spark-phoenix-df");
sparkConf.setMaster("local[*]");
JavaSparkContext sc = new JavaSparkContext(sparkConf);
SQLContext sqlContext = new org.apache.spark.sql.SQLContext(sc);
DataFrame df = sqlContext.read()
.format("org.apache.phoenix.spark")
.option("table", "ORDERS")
.option("zkUrl", "localhost:2181")
.load();
df.count();
}
示例11: setup
import org.apache.spark.SparkConf; //导入方法依赖的package包/类
@Before
public void setup() {
SparkConf sparkConf = new SparkConf();
String master = "local[2]";
sparkConf.setMaster(master);
sparkConf.setAppName("Local Spark Unit Test");
sc = new JavaSparkContext(new SparkContext(sparkConf));
sqlContext = new SQLContext(sc);
}
示例12: run
import org.apache.spark.SparkConf; //导入方法依赖的package包/类
public void run() throws IOException {
FileSystem fs = DistributedFileSystem.get(new Configuration());
Path inpath = new Path(input);
Path outpath = new Path(output);
if (!fs.exists(inpath)) {
throw new IllegalArgumentException("Input file not found: " + inpath);
}
if (fs.exists(outpath)) {
throw new IllegalArgumentException("Output file exists, Not overwriting it: " + inpath);
}
SparkConf conf = new SparkConf();
conf.setMaster(sparkMaster);
conf.setAppName(getClass().getSimpleName() + "::" + System.currentTimeMillis());
JavaSparkContext ctx = new JavaSparkContext(conf);
//STEP1: READ
JavaPairRDD<Text, BytesWritable> rdd = ctx.sequenceFile(input, Text.class, BytesWritable.class);
//.mapToPair(rec -> new Tuple2<>(new Text(rec._1()), new BytesWritable(rec._2().getBytes())));
//STEP2: PARSE
JavaPairRDD<Text, Metadata> parsedRDD = rdd.mapToPair(
(PairFunction<Tuple2<Text, BytesWritable>, Text, Metadata>) rec -> {
Metadata md = new Metadata();
try (ByteArrayInputStream stream = new ByteArrayInputStream(rec._2().getBytes())) {
String content = TikaHolder.tika.parseToString(stream, md);
md.add("CONTENT", content);
}
return new Tuple2<>(rec._1(), md);
});
//STEP3: FORMAT
JavaRDD<String> outRDD = parsedRDD.map((Function<Tuple2<Text, Metadata>, String>) rec -> {
String key = rec._1().toString();
Metadata metadata = rec._2();
JSONObject object = new JSONObject();
for (String name : metadata.names()) {
if (metadata.isMultiValued(name)) {
JSONArray arr = new JSONArray();
for (String val : metadata.getValues(name)) {
arr.add(val);
}
object.put(name, arr);
} else {
object.put(name, metadata.get(name));
}
}
return key + "\t\t" + object.toJSONString();
});
//STEP4: SAVE
LOG.info("Saving at " + outpath);
outRDD.saveAsTextFile(output);
LOG.info("Stopping");
ctx.stop();
}
示例13: execute
import org.apache.spark.SparkConf; //导入方法依赖的package包/类
@SuppressWarnings("deprecation")
private void execute() {
SparkConf conf = new SparkConf();
conf.setAppName("cassandra-spark-poc");
conf.setMaster("local[*]");
SparkContext sparkContext = new SparkContext(conf);
System.out.println(sparkContext);
SparkSession sparkSession = SparkSession.builder().appName("cassandra-spark-poc").master("local[*]")
.getOrCreate();
SQLContext sqlContext = new SQLContext(sparkSession);
Map<String, String> options = new HashMap<String, String>();
options.put("keyspace", "wootag");
options.put("table", "video_view");
Dataset<Row> dataset = sqlContext.read().format("org.apache.spark.sql.cassandra").options(options).load()
.cache();
dataset.registerTempTable("temptable");
String query = "select video_id, view_duration_in_second, count(*) from temptable group by 1, 2";
List<Row> collectAsList = sqlContext.sql(query).collectAsList();
for (Row row : collectAsList) {
System.out.println(row.get(0) + "," + row.get(1) + "," + row.get(2));
}
// sqlContext.sql(query).show(1000);
long startTime = 1485907200000L;
long endTime = 1487226374000L;
for (long i = startTime; i <= endTime; i = i + TimeUnit.DAYS.toMillis(1)) {
dataset.filter(new Column("event_start_timestamp").geq(i))
.filter(new Column("event_start_timestamp").leq(i + TimeUnit.DAYS.toMillis(1)))
.groupBy(new Column("view_duration_in_second"), new Column("video_id")).count()
.orderBy("view_duration_in_second").show(1000);
sleepDelay();
}
}
示例14: create
import org.apache.spark.SparkConf; //导入方法依赖的package包/类
public static void create(final Configuration configuration) {
final SparkConf sparkConf = new SparkConf();
configuration.getKeys().forEachRemaining(key -> sparkConf.set(key, configuration.getProperty(key).toString()));
sparkConf.setAppName("Apache TinkerPop's Spark-Gremlin");
CONTEXT = SparkContext.getOrCreate(sparkConf);
}
示例15: shouldSetThreadLocalProperties
import org.apache.spark.SparkConf; //导入方法依赖的package包/类
@Test
public void shouldSetThreadLocalProperties() throws Exception {
final String testName = "ThreadLocalProperties";
final String rddName = TestHelper.makeTestDataDirectory(LocalPropertyTest.class) + UUID.randomUUID().toString();
final Configuration configuration = new BaseConfiguration();
configuration.setProperty("spark.master", "local[4]");
configuration.setProperty("spark.serializer", GryoSerializer.class.getCanonicalName());
configuration.setProperty(Graph.GRAPH, HadoopGraph.class.getName());
configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern.kryo"));
configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName());
configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName);
configuration.setProperty(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false);
configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);
configuration.setProperty("spark.jobGroup.id", "22");
Graph graph = GraphFactory.open(configuration);
graph.compute(SparkGraphComputer.class)
.result(GraphComputer.ResultGraph.NEW)
.persist(GraphComputer.Persist.EDGES)
.program(TraversalVertexProgram.build()
.traversal(graph.traversal().withComputer(Computer.compute(SparkGraphComputer.class)),
"gremlin-groovy",
"g.V()").create(graph)).submit().get();
////////
SparkConf sparkConfiguration = new SparkConf();
sparkConfiguration.setAppName(testName);
ConfUtil.makeHadoopConfiguration(configuration).forEach(entry -> sparkConfiguration.set(entry.getKey(), entry.getValue()));
JavaSparkContext sparkContext = new JavaSparkContext(SparkContext.getOrCreate(sparkConfiguration));
JavaSparkStatusTracker statusTracker = sparkContext.statusTracker();
assertTrue(statusTracker.getJobIdsForGroup("22").length >= 1);
assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName)));
///////
configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, PersistedInputRDD.class.getCanonicalName());
configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, rddName);
configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, null);
configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, null);
configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, false);
configuration.setProperty("spark.jobGroup.id", "44");
graph = GraphFactory.open(configuration);
graph.compute(SparkGraphComputer.class)
.result(GraphComputer.ResultGraph.NEW)
.persist(GraphComputer.Persist.NOTHING)
.program(TraversalVertexProgram.build()
.traversal(graph.traversal().withComputer(SparkGraphComputer.class),
"gremlin-groovy",
"g.V()").create(graph)).submit().get();
///////
assertTrue(statusTracker.getJobIdsForGroup("44").length >= 1);
}