本文整理汇总了Java中org.apache.spark.SparkContext类的典型用法代码示例。如果您正苦于以下问题:Java SparkContext类的具体用法?Java SparkContext怎么用?Java SparkContext使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
SparkContext类属于org.apache.spark包,在下文中一共展示了SparkContext类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: doOperation
import org.apache.spark.SparkContext; //导入依赖的package包/类
private RDD<Element> doOperation(final GetRDDOfElements operation,
final Context context,
final AccumuloStore accumuloStore)
throws OperationException {
final Configuration conf = getConfiguration(operation);
final SparkContext sparkContext = SparkContextUtil.getSparkSession(context, accumuloStore.getProperties()).sparkContext();
sparkContext.hadoopConfiguration().addResource(conf);
// Use batch scan option when performing seeded operation
InputConfigurator.setBatchScan(AccumuloInputFormat.class, conf, true);
addIterators(accumuloStore, conf, context.getUser(), operation);
addRanges(accumuloStore, conf, operation);
final RDD<Tuple2<Element, NullWritable>> pairRDD = sparkContext.newAPIHadoopRDD(conf,
ElementInputFormat.class,
Element.class,
NullWritable.class);
return pairRDD.map(new FirstElement(), ClassTagConstants.ELEMENT_CLASS_TAG);
}
示例2: PredictWithModel
import org.apache.spark.SparkContext; //导入依赖的package包/类
@SuppressWarnings("unchecked")
public PredictWithModel(String modelName, String modelPath, String testFile, int numClasses, int minPartition, double threshold, SparkContext sc){
this.numClasses = numClasses;
this.threshold = threshold;
if(modelName.equals("LogisticRegressionModel")){
LogisticRegressionModel lrmodel = LogisticRegressionModel.load(sc, modelPath);
this.model = (T)(Object) lrmodel;
}
else if(modelName.equals("SVMModel")){
SVMModel svmmodel = SVMModel.load(sc, modelPath);
this.model = (T)(Object) svmmodel;
}
else if(modelName.equals("NaiveBayesModel")){
NaiveBayesModel bayesmodel = NaiveBayesModel.load(sc, modelPath);
this.model = (T)(Object) bayesmodel;
}
//Load testing data
LoadProcess loadProcess = new LoadProcess(sc, minPartition);
testingData = loadProcess.load(testFile, "Vector");
testingData.cache();
}
示例3: initializeVariables
import org.apache.spark.SparkContext; //导入依赖的package包/类
public void initializeVariables(SparkContext sc) {
for (int i = 0; i < this.numberOfCluster; i++) {
longAccumulator = sc.accumulator(0L, new LongAccumulatorParam());
clusterCounterMaliciousList.add(longAccumulator);
longAccumulator = sc.accumulator(0L, new LongAccumulatorParam());
clusterCounterBenignList.add(longAccumulator);
}
longAccumulator = sc.accumulator(0L, new LongAccumulatorParam());
totalBenign = sc.accumulator(0L, new LongAccumulatorParam());
totalMalicious = sc.accumulator(0L, new LongAccumulatorParam());
totalNanoSeconds = sc.accumulator(0L, "totalNanoSeconds", new LongAccumulatorParam());
flowCounterMalicious = sc.accumulator(new HashMap<BigInteger, Boolean>(), new UniqueFlowAccumulatorParam());
flowCounterBenign = sc.accumulator(new HashMap<BigInteger, Boolean>(), new UniqueFlowAccumulatorParam());
}
示例4: main
import org.apache.spark.SparkContext; //导入依赖的package包/类
/**
* @param args
* @throws SQLException
*/
public static void main(String[] args) throws SQLException {
if (args.length == 0) {
System.out.println("Usage: ImpalaSparkJDBC <url> <tableName>");
System.out.println(" (secure) jdbc:impala://impala-host:21050/;AuthMech=1;KrbRealm=realm;KrbHostFQDN=krbHost;KrbServiceName=impala");
System.out.println(" (insecure) jdbc:hive2://impala-host:21050/;auth=noSasl");
System.exit(1);
}
Properties prop = new Properties();
prop.setProperty("driver","com.cloudera.impala.jdbc41.Driver");
System.setProperty("java.security.auth.login.config", "jaas.conf");
System.setProperty("sun.security.jgss.debug","true");
System.setProperty("javax.security.auth.useSubjectCredsOnly","false");
SparkConf sparkConf = new SparkConf().setAppName("ImpalaJDBC");
SparkContext sc = new SparkContext(sparkConf);
SQLContext sqlContext = SQLContext.getOrCreate(sc);
sqlContext.read().jdbc(args[0], args[1], prop).show();
}
示例5: checkVersion
import org.apache.spark.SparkContext; //导入依赖的package包/类
static
public void checkVersion(){
SparkContext sparkContext = SparkContext.getOrCreate();
int[] version = parseVersion(sparkContext.version());
if(!Arrays.equals(ConverterUtil.VERSION, version)){
throw new IllegalArgumentException("Expected Apache Spark ML version " + formatVersion(ConverterUtil.VERSION) + ", got version " + formatVersion(version) + " (" + sparkContext.version() + ")");
}
}
示例6: Bounded
import org.apache.spark.SparkContext; //导入依赖的package包/类
public Bounded(
SparkContext sc,
BoundedSource<T> source,
SerializablePipelineOptions options,
String stepName) {
super(sc, NIL, JavaSparkContext$.MODULE$.<WindowedValue<T>>fakeClassTag());
this.source = source;
this.options = options;
// the input parallelism is determined by Spark's scheduler backend.
// when running on YARN/SparkDeploy it's the result of max(totalCores, 2).
// when running on Mesos it's 8.
// when running local it's the total number of cores (local = 1, local[N] = N,
// local[*] = estimation of the machine's cores).
// ** the configuration "spark.default.parallelism" takes precedence over all of the above **
this.numPartitions = sc.defaultParallelism();
checkArgument(this.numPartitions > 0, "Number of partitions must be greater than zero.");
this.stepName = stepName;
this.metricsAccum = MetricsAccumulator.getInstance();
}
示例7: run
import org.apache.spark.SparkContext; //导入依赖的package包/类
public void run() {
long microsLower = day * 1000;
long microsUpper = (day * 1000) + TimeUnit.DAYS.toMicros(1) - 1;
log.info("Running Dependencies job for {}: {} ≤ Span.timestamp {}", dateStamp, microsLower,
microsUpper);
SparkContext sc = new SparkContext(conf);
List<DependencyLink> links = javaFunctions(sc).cassandraTable(keyspace, "traces")
.spanBy(r -> r.getLong("trace_id"), Long.class)
.flatMapValues(new CassandraRowsToDependencyLinks(logInitializer, microsLower, microsUpper))
.values()
.mapToPair(link -> new Tuple2<>(new Tuple2<>(link.parent, link.child), link))
.reduceByKey((l, r) -> DependencyLink.builder()
.parent(l.parent)
.child(l.child)
.callCount(l.callCount + r.callCount)
.errorCount(l.errorCount + r.errorCount).build())
.values().collect();
sc.stop();
saveToCassandra(links);
}
示例8: RFileReaderRDD
import org.apache.spark.SparkContext; //导入依赖的package包/类
public RFileReaderRDD(final SparkContext sparkContext,
final String instanceName,
final String zookeepers,
final String user,
final String password,
final String tableName,
final Set<String> auths,
final byte[] serialisedConfiguration) {
super(sparkContext, JavaConversions.asScalaBuffer(new ArrayList<>()),
ClassTag$.MODULE$.apply(Map.Entry.class));
this.instanceName = instanceName;
this.zookeepers = zookeepers;
this.user = user;
this.password = password;
this.tableName = tableName;
this.auths = auths;
this.serialisedConfiguration = serialisedConfiguration;
}
示例9: doOperation
import org.apache.spark.SparkContext; //导入依赖的package包/类
public void doOperation(final ImportJavaRDDOfElements operation, final Context context, final AccumuloStore store) throws OperationException {
final String outputPath = operation.getOption(OUTPUT_PATH);
if (null == outputPath || outputPath.isEmpty()) {
throw new OperationException("Option outputPath must be set for this option to be run against the accumulostore");
}
final String failurePath = operation.getOption(FAILURE_PATH);
if (null == failurePath || failurePath.isEmpty()) {
throw new OperationException("Option failurePath must be set for this option to be run against the accumulostore");
}
final SparkContext sparkContext = SparkContextUtil.getSparkSession(context, store.getProperties()).sparkContext();
final Broadcast<AccumuloElementConverter> broadcast = JavaSparkContext.fromSparkContext(sparkContext).broadcast(store.getKeyPackage().getKeyConverter());
final ElementConverterFunction func = new ElementConverterFunction(broadcast);
final JavaPairRDD<Key, Value> rdd = operation.getInput().flatMapToPair(func);
final ImportKeyValueJavaPairRDDToAccumulo op =
new ImportKeyValueJavaPairRDDToAccumulo.Builder()
.input(rdd)
.failurePath(failurePath)
.outputPath(outputPath)
.build();
store.execute(new OperationChain(op), context);
}
示例10: createSparkContext
import org.apache.spark.SparkContext; //导入依赖的package包/类
/**
* Helper method for creating the spark context from the given cognition configuration
* @return a new configured spark context
*/
public SparkContext createSparkContext() {
SparkConf conf = new SparkConf();
Configuration config = cognition.getProperties();
conf.set("spark.serializer", KryoSerializer.class.getName());
conf.setAppName(config.getString("app.name"));
conf.setMaster(config.getString("master"));
Iterator<String> iterator = config.getKeys("spark");
while (iterator.hasNext()) {
String key = iterator.next();
conf.set(key, config.getString(key));
}
SparkContext sc = new SparkContext(conf);
for (String jar : config.getStringArray("jars")) {
sc.addJar(jar);
}
return sc;
}
示例11: exportStatFiles
import org.apache.spark.SparkContext; //导入依赖的package包/类
@Override
public void exportStatFiles(String outputPath, SparkContext sc) throws IOException {
String d = DEFAULT_DELIMITER;
//Total time stats (includes total example counts)
String totalTimeStatsPath = FilenameUtils.concat(outputPath, FILENAME_TOTAL_TIME_STATS);
StatsUtils.exportStats(workerFlatMapTotalTimeMs, totalTimeStatsPath, d, sc);
//"Get initial model" stats:
String getInitialModelStatsPath = FilenameUtils.concat(outputPath, FILENAME_GET_INITIAL_MODEL_STATS);
StatsUtils.exportStats(workerFlatMapGetInitialModelTimeMs, getInitialModelStatsPath, d, sc);
//"DataSet get time" stats:
String getDataSetStatsPath = FilenameUtils.concat(outputPath, FILENAME_DATASET_GET_TIME_STATS);
StatsUtils.exportStats(workerFlatMapDataSetGetTimesMs, getDataSetStatsPath, d, sc);
//Process minibatch time stats:
String processMiniBatchStatsPath = FilenameUtils.concat(outputPath, FILENAME_PROCESS_MINIBATCH_TIME_STATS);
StatsUtils.exportStats(workerFlatMapProcessMiniBatchTimesMs, processMiniBatchStatsPath, d, sc);
if (trainingWorkerSpecificStats != null)
trainingWorkerSpecificStats.exportStatFiles(outputPath, sc);
}
示例12: submitBatch
import org.apache.spark.SparkContext; //导入依赖的package包/类
private void submitBatch(int batch, int batchSize, int numPartitions, final JavaRDD<String> streamed, final Properties properties) {
final List<Integer> list = new ArrayList<>();
for (int j = batch*batchSize; j < numPartitions && j < (batch+1)*batchSize; j++) {
list.add(j);
}
if (LOG.isTraceEnabled())
LOG.trace("Submitting batch " + batch + " with partitions " + list);
final Seq objects = JavaConversions.asScalaBuffer(list).toList();
completionService.submit(new Callable<Object>() {
@Override
public Object call() {
SparkContext sc = SpliceSpark.getContextUnsafe().sc();
sc.setLocalProperties(properties);
String[] results = (String[]) sc.runJob(streamed.rdd(), new FunctionAdapter(), objects, tag);
for (String o2: results) {
if ("STOP".equals(o2)) {
return "STOP";
}
}
return "CONTINUE";
}
});
}
示例13: TrainModel
import org.apache.spark.SparkContext; //导入依赖的package包/类
public TrainModel(SparkContext sc, String trainFile, String validFile, int numClasses, String modelName, int colIdx){
this.trainFile = trainFile;
this.validFile = validFile;
this.numClasses = numClasses;
this.modelName = modelName;
// Load training/validate data
LoadProcess loadProcess = new LoadProcess(sc, this.minPartition);
trainingData = loadProcess.load(trainFile, "LabeledPoint", colIdx);
trainingData.cache();
validData = loadProcess.load(validFile, "LabeledPoint", colIdx);
validData.cache();
}
示例14: prepareVideoRDD
import org.apache.spark.SparkContext; //导入依赖的package包/类
private JavaRDD<VideoViewEvent> prepareVideoRDD(String localFilePath, SparkContext sparkContext) {
JavaRDD<VideoViewEvent> videoEventRDD = sparkContext.textFile(localFilePath, 2).toJavaRDD()
.map(new Function<String, VideoViewEvent>() {
private static final long serialVersionUID = 1L;
@Override
public VideoViewEvent call(String line) throws Exception {
return new Gson().fromJson(line, VideoViewEvent.class);
}
});
return videoEventRDD;
}
示例15: initializeVariables
import org.apache.spark.SparkContext; //导入依赖的package包/类
public void initializeVariables(SparkContext sc) {
for (int i = 0; i < this.numberOfLabels; i++) {
longAccumulator = sc.accumulator(0L, new LongAccumulatorParam());
classificationCounterValidatedList.add(longAccumulator);
longAccumulator = sc.accumulator(0L, new LongAccumulatorParam());
classificationCounterOriginList.add(longAccumulator);
//unique entries
flowCounter = sc.accumulator(new HashMap<BigInteger, Boolean>(), new UniqueFlowAccumulatorParam());
uniqueOriginEntires.add(flowCounter);
flowCounter = sc.accumulator(new HashMap<BigInteger, Boolean>(), new UniqueFlowAccumulatorParam());
uniqueValidatedEntires.add(flowCounter);
}
longAccumulator = sc.accumulator(0L, new LongAccumulatorParam());
totalBenign = sc.accumulator(0L, new LongAccumulatorParam());
totalMalicious = sc.accumulator(0L, new LongAccumulatorParam());
truePositive = sc.accumulator(0L, new LongAccumulatorParam());
falseNegative = sc.accumulator(0L, new LongAccumulatorParam());
falsePositive = sc.accumulator(0L, new LongAccumulatorParam());
trueNegative = sc.accumulator(0L, new LongAccumulatorParam());
totalNanoSeconds = sc.accumulator(0L, "totalNanoSeconds", new LongAccumulatorParam());
flowCounterMalicious = sc.accumulator(new HashMap<BigInteger, Boolean>(), new UniqueFlowAccumulatorParam());
flowCounterBenign = sc.accumulator(new HashMap<BigInteger, Boolean>(), new UniqueFlowAccumulatorParam());
}