本文整理汇总了Java中org.apache.spark.api.java.JavaSparkContext.close方法的典型用法代码示例。如果您正苦于以下问题:Java JavaSparkContext.close方法的具体用法?Java JavaSparkContext.close怎么用?Java JavaSparkContext.close使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.api.java.JavaSparkContext
的用法示例。
在下文中一共展示了JavaSparkContext.close方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.spark.api.java.JavaSparkContext; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
System.out.println(System.getProperty("hadoop.home.dir"));
String inputPath = args[0];
String outputPath = args[1];
FileUtils.deleteQuietly(new File(outputPath));
JavaSparkContext sc = new JavaSparkContext("local", "sparkwordcount");
JavaRDD<String> rdd = sc.textFile(inputPath);
JavaPairRDD<String, Integer> counts = rdd
.flatMap(x -> Arrays.asList(x.split(" ")).iterator())
.mapToPair(x -> new Tuple2<String, Integer>((String) x, 1))
.reduceByKey((x, y) -> x + y);
counts.saveAsTextFile(outputPath);
sc.close();
}
示例2: main
import org.apache.spark.api.java.JavaSparkContext; //导入方法依赖的package包/类
public static void main( String[] args ){
String inputFile = "data/dummy.txt";
SparkConf configuration = new SparkConf().setMaster("local[4]").setAppName("My App");
JavaSparkContext sparkContext = new JavaSparkContext(configuration);
JavaRDD<String> logData = sparkContext.textFile(inputFile).cache();
long numberA = logData.filter(new Function<String,Boolean>(){
private static final long serialVersionUID = 1L;
public Boolean call(String s){
return s.length() == 0;
}
}).count();
sparkContext.close();
System.out.println("Empty Lines: " + numberA);
}
示例3: main
import org.apache.spark.api.java.JavaSparkContext; //导入方法依赖的package包/类
public static void main(String[] args) {
System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");
SparkConf conf =new SparkConf().setMaster("local").setAppName("Local File System Example");
JavaSparkContext jsc=new JavaSparkContext(conf);
// jsc.hadoopConfiguration().setLong("dfs.block.size",20000);
// jsc.hadoopConfiguration().setLong("fs.local.block.size",20000);
//
// JavaRDD<String> localFile=jsc.textFile("C:\\Users\\sgulati\\Documents\\Result\\test\\a.txt");
// localFile.flatMap(x -> Arrays.asList(x.split(" ")).iterator()).mapToPair(x -> new Tuple2<String, Integer>((String) x, 1))
// .reduceByKey((x, y) -> x + y).saveAsTextFile("C:\\Users\\sgulati\\Documents\\Result\\out_path");
// JavaRDD<String> localFile1 = jsc.textFile("C:\\Users\\sgulati\\Documents\\Result\\test\\a.txt,C:\\Users\\sgulati\\Documents\\Result\\test\\b.txt");
//
// System.out.println(localFile1.getNumPartitions());
// localFile1.flatMap(x -> Arrays.asList(x.split(" ")).iterator()).mapToPair(x -> new Tuple2<String, Integer>((String) x, 1))
// .reduceByKey((x, y) -> x + y).saveAsTextFile("C:\\Users\\sgulati\\Documents\\Result\\out_path1");
JavaRDD<String> localFile2 =jsc.textFile("C:\\Users\\sgulati\\Documents\\Result\\test\\*");
System.out.println(localFile2.getNumPartitions());
// localFile2.flatMap(x -> Arrays.asList(x.split(" ")).iterator()).mapToPair(x -> new Tuple2<String, Integer>((String) x, 1))
// .reduceByKey((x, y) -> x + y).saveAsTextFile("C:\\Users\\sgulati\\Documents\\Result\\out_path2");
////
// JavaRDD<String> localFile3 =jsc.textFile("C:\\Users\\sgulati\\Documents\\Result\\test\\*,C:\\Users\\sgulati\\Documents\\Result\\test5\\*");
//
// localFile3.flatMap(x -> Arrays.asList(x.split(" ")).iterator()).mapToPair(x -> new Tuple2<String, Integer>((String) x, 1))
// .reduceByKey((x, y) -> x + y).saveAsTextFile("C:\\Users\\sgulati\\Documents\\Result\\out_path3");
//
// JavaPairRDD<String, String> localFileWhole = jsc.wholeTextFiles("C:\\Users\\sgulati\\Documents\\Result\\test\\a.txt,C:\\Users\\sgulati\\Documents\\Result\\test\\b.txt");
// System.out.println(localFileWhole.collect());
jsc.close();
}
示例4: main
import org.apache.spark.api.java.JavaSparkContext; //导入方法依赖的package包/类
public static void main( String[] args )
{
Dataset<Row> mutations = DataProvider.getMutationsToStructures();
List<String> pdbIds = mutations.select(col("pdbId"))
.distinct().toJavaRDD().map(t -> t.getString(0)).collect();
List<Row> broadcasted = mutations.select("pdbId", "chainId", "pdbAtomPos").collectAsList();
SaprkUtils.stopSparkSession();
JavaSparkContext sc = SaprkUtils.getSparkContext();
Broadcast<List<Row>> bcmut = sc.broadcast(broadcasted);
MmtfReader//.readSequenceFile("/pdb/2017/full", pdbIds, sc)
.downloadMmtfFiles(Arrays.asList("5IRC"), sc)
.flatMapToPair(new StructureToPolymerChains())
.flatMapToPair(new AddResidueToKey(bcmut))
.mapValues(new StructureToBioJava())
.mapToPair(new FilterResidue())
.filter(t -> t._2!=null).keys()
.map(t -> t.replace(".", ","))
.saveAsTextFile("/Users/yana/git/mutantpdb/src/main/resources/pdb_residues");
sc.close();
}
示例5: main
import org.apache.spark.api.java.JavaSparkContext; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception{
String srcBucketName;
String scrBucketKey;
String destBucketName;
String destPrefix;
ArgumentParser argumentParser = new ArgumentParser();
AmazonS3 s3Client = new AmazonS3Client();
try {
BucketKey location = argumentParser.parseArguments(args);
srcBucketName = location.getSrcBucket();
scrBucketKey = location.getSrcKey();
destBucketName = location.getDestBucket();
destPrefix = location.getDestPrefix();
} catch (ParseException e) {
LOG.info(PARSE_ERROR_MSG);
throw new IllegalArgumentException("Parser throw a parse Exception", e);
}
// Obtain the original manifest files
InventoryManifestRetriever inventoryManifestRetriever =
new InventoryManifestRetriever(s3Client, srcBucketName, scrBucketKey);
InventoryManifest manifest = inventoryManifestRetriever.getInventoryManifest();
// Check if the inventory report includes the StorageClass column
String fileSchema = manifest.getFileSchema();
String filterColumn = "storageClass";
if (!StringUtils.containsIgnoreCase(fileSchema, filterColumn)) {
throw new StorageClassNotIncludedException();
}
//Create Spark Context
SparkConf sparkConf = new SparkConf();
JavaSparkContext sc = new JavaSparkContext(sparkConf);
Broadcast<CachedS3ClientFactory> clientFactory = sc.broadcast(new CachedS3ClientFactory());
// Get the inventory report, split it into lines, parse each line to a POJO,
// Filter, and write new csv file to S3
JavaRDD<InventoryManifest.Locator> locatorRDD = sc.parallelize(manifest.getLocators());
List<InventoryManifest.Locator> newLocatorList = locatorRDD
.map(new InventoryReportLineRetriever(clientFactory, manifest))
.flatMap(new InventoryReportMapper(manifest))
.filter(new ReducedRedundancyStorageClassFilter())
.mapPartitions(new WriteNewInventoryReportFunc(clientFactory, srcBucketName, manifest,
destBucketName, destPrefix))
.collect();
// Generate new manifest files including new locators, and send them back to S3
new ManifestWriter(s3Client, destBucketName, destPrefix, srcBucketName, manifest)
.writeManifest(newLocatorList);
sc.close();
}
开发者ID:awslabs,项目名称:s3-inventory-usage-examples,代码行数:54,代码来源:ReducedRedundancyLocatorExampleMain.java
示例6: main
import org.apache.spark.api.java.JavaSparkContext; //导入方法依赖的package包/类
public static void main(String[] args) throws IOException {
boolean argumentsValid = parseArguments(args);
if (!argumentsValid) {
printHelp();
System.exit(1);
}
final Configuration hadoopConfig = new Configuration(true);
final FileSystem hdfs = FileSystem.get(hadoopConfig);
if (hdfs.exists(outputPath)) {
System.out.printf("output path '%s' already exists in HDFS!%n", outputPath);
System.exit(1);
}
System.out.printf("reading from: %s%n", inputPath);
System.out.printf("writing to: %s%n", outputPath);
System.out.printf("pattern: %s%n", pattern.pattern());
System.out.printf("sample fraction: %f%n", sampleFraction);
System.out.printf("...%n");
System.out.printf("%n");
SparkConf sparkConfig = new SparkConf().setAppName(String.format("Reading sample (fraction %f) from '%s'", sampleFraction, inputPath));
JavaSparkContext sparkContext = new JavaSparkContext(sparkConfig);
LogfileInputFormat.setPattern(hadoopConfig, pattern);
JavaPairRDD<Tuple2<Path, Long>, Text> rdd = sparkContext.newAPIHadoopFile(
inputPath.toString(),
LogfileInputFormat.class,
LogfileInputFormat.KEY_CLASS,
Text.class,
hadoopConfig);
rdd.sample(false, sampleFraction)
.map(tuple -> String.format("%[email protected]%016d:%n%n%s%n%n", tuple._1._1.toString(), tuple._1._2, tuple._2.toString()))
.repartition(1)
.saveAsTextFile(outputPath.toString());
sparkContext.close();
}
示例7: main
import org.apache.spark.api.java.JavaSparkContext; //导入方法依赖的package包/类
public static void main(String[] args) throws IOException {
final Configuration hadoopConfig = new Configuration(true);
hdfs = FileSystem.get(hadoopConfig);
if (!parseArguments(args)) {
printHelp();
System.exit(1);
}
if (hdfs.exists(directory)) {
if (!hdfs.isDirectory(directory)) {
System.out.printf("'%s' exists in HDFS, but is not a directory!%n", directory);
System.exit(1);
}
FileStatus[] fileStatus = hdfs.listStatus(directory);
if (fileStatus.length > 0) {
System.out.printf("'%s' exists in HDFS, but is not empty!%n", directory);
System.exit(1);
}
}
createDirectories();
System.out.printf("Creating test data in '%s'. This may take a while...%n", directory.toString());
Map<String, LogfileType> logfileTypeByPath = new HashMap<>();
LogfileSummary summary = writeLogFiles(logfileTypeByPath);
SparkConf sparkConfig = new SparkConf().setAppName("Testing LogfileInputFormat.");
JavaSparkContext sparkContext = new JavaSparkContext(sparkConfig);
logfileTypeByPath.forEach((path, type) -> {
LogfileInputFormat.setPattern(hadoopConfig, path, type.getFirstlinePattern());
});
LogfileInputFormat.setPattern(hadoopConfig, LogfileType.A.getFirstlinePattern());
JavaPairRDD<Tuple2<Path, Long>, Text> rdd;
JavaRDD<Tuple2<LocalDateTime, LogLevel>> logRecords;
rdd = sparkContext.newAPIHadoopFile(logDir + "/*" + FILE_EXT_LOG, LogfileInputFormat.class, LogfileInputFormat.KEY_CLASS, Text.class, hadoopConfig);
Function<Tuple2<Tuple2<Path, Long>, Text>, Tuple2<LocalDateTime, LogLevel>> mappingFunction = mappingFunction(logfileTypeByPath);
logRecords = rdd.map(mappingFunction).cache();
long totalCountLog = logRecords.count();
long infoCountLog = logRecords.filter(tuple -> tuple._2 == LogLevel.INFO).count();
long warnCountLog = logRecords.filter(tuple -> tuple._2 == LogLevel.WARN).count();
long errorCountLog = logRecords.filter(tuple -> tuple._2 == LogLevel.ERROR).count();
rdd = sparkContext.newAPIHadoopFile(logDirGz + "/*" + FILE_EXT_GZ, LogfileInputFormat.class, LogfileInputFormat.KEY_CLASS, Text.class, hadoopConfig);
logRecords = rdd.map(mappingFunction).cache();
long totalCountGz = logRecords.count();
long infoCountGz = logRecords.filter(tuple -> tuple._2 == LogLevel.INFO).count();
long warnCountGz = logRecords.filter(tuple -> tuple._2 == LogLevel.WARN).count();
long errorCountGz = logRecords.filter(tuple -> tuple._2 == LogLevel.ERROR).count();
long totalCountExpected = summary.getRecordCount();
long infoCountExpected = summary.getRecordCount(LogLevel.INFO);
long warnCountExpected = summary.getRecordCount(LogLevel.WARN);
long errorCountExpected = summary.getRecordCount(LogLevel.ERROR);
System.out.printf("%n%n%n%30s %15s %15s %15s %15s%n%n", "", "expected", "from *.log", "from *.log.gz", "test result");
System.out.printf("%30s %15d %15d %15d %15s%n", "total # of log records",
totalCountExpected, totalCountLog, totalCountGz,
((totalCountExpected == totalCountLog && totalCountLog == totalCountGz) ? "SUCCESS" : "FAILURE"));
System.out.printf("%30s %15d %15d %15d %15s%n", "# of INFO level records",
infoCountExpected, infoCountLog, infoCountGz,
((infoCountExpected == infoCountLog && infoCountLog == infoCountGz) ? "SUCCESS" : "FAILURE"));
System.out.printf("%30s %15d %15d %15d %15s%n", "# of WARN level records",
warnCountExpected, warnCountLog, warnCountGz,
((warnCountExpected == warnCountLog && warnCountLog == warnCountGz) ? "SUCCESS" : "FAILURE"));
System.out.printf("%30s %15d %15d %15d %15s%n%n%n", "# of ERROR level records",
errorCountExpected, errorCountLog, errorCountGz,
((errorCountExpected == errorCountLog && errorCountLog == errorCountGz) ? "SUCCESS" : "FAILURE"));
sparkContext.close();
}