当前位置: 首页>>代码示例>>Java>>正文


Java JavaSparkContext.close方法代码示例

本文整理汇总了Java中org.apache.spark.api.java.JavaSparkContext.close方法的典型用法代码示例。如果您正苦于以下问题:Java JavaSparkContext.close方法的具体用法?Java JavaSparkContext.close怎么用?Java JavaSparkContext.close使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.spark.api.java.JavaSparkContext的用法示例。


在下文中一共展示了JavaSparkContext.close方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.spark.api.java.JavaSparkContext; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
	System.out.println(System.getProperty("hadoop.home.dir"));
	String inputPath = args[0];
	String outputPath = args[1];
	FileUtils.deleteQuietly(new File(outputPath));

	JavaSparkContext sc = new JavaSparkContext("local", "sparkwordcount");

	JavaRDD<String> rdd = sc.textFile(inputPath);

	JavaPairRDD<String, Integer> counts = rdd
			.flatMap(x -> Arrays.asList(x.split(" ")).iterator())
			.mapToPair(x -> new Tuple2<String, Integer>((String) x, 1))
			.reduceByKey((x, y) -> x + y);

	counts.saveAsTextFile(outputPath);
	sc.close();
}
 
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:19,代码来源:SparkWordCount.java

示例2: main

import org.apache.spark.api.java.JavaSparkContext; //导入方法依赖的package包/类
public static void main( String[] args ){
	String inputFile = "data/dummy.txt";
	SparkConf configuration = new SparkConf().setMaster("local[4]").setAppName("My App");
	JavaSparkContext sparkContext = new JavaSparkContext(configuration);
	JavaRDD<String> logData = sparkContext.textFile(inputFile).cache();

	long numberA = logData.filter(new Function<String,Boolean>(){
		private static final long serialVersionUID = 1L;
		public Boolean call(String s){
			return s.length() == 0;
		}
	}).count();
	sparkContext.close();
	System.out.println("Empty Lines: " + numberA);
}
 
开发者ID:PacktPublishing,项目名称:Java-Data-Science-Cookbook,代码行数:16,代码来源:ScalaTest.java

示例3: main

import org.apache.spark.api.java.JavaSparkContext; //导入方法依赖的package包/类
public static void main(String[] args) {
		System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");
		SparkConf conf =new SparkConf().setMaster("local").setAppName("Local File System Example");
		
		
		JavaSparkContext jsc=new JavaSparkContext(conf);
	//	jsc.hadoopConfiguration().setLong("dfs.block.size",20000);
	  
	//	jsc.hadoopConfiguration().setLong("fs.local.block.size",20000);
		  
//		
//		JavaRDD<String> localFile=jsc.textFile("C:\\Users\\sgulati\\Documents\\Result\\test\\a.txt");
//		localFile.flatMap(x -> Arrays.asList(x.split(" ")).iterator()).mapToPair(x -> new Tuple2<String, Integer>((String) x, 1))
//		.reduceByKey((x, y) -> x + y).saveAsTextFile("C:\\Users\\sgulati\\Documents\\Result\\out_path");
		
		
//		JavaRDD<String> localFile1 = jsc.textFile("C:\\Users\\sgulati\\Documents\\Result\\test\\a.txt,C:\\Users\\sgulati\\Documents\\Result\\test\\b.txt");
//		
//		System.out.println(localFile1.getNumPartitions());
//		localFile1.flatMap(x -> Arrays.asList(x.split(" ")).iterator()).mapToPair(x -> new Tuple2<String, Integer>((String) x, 1))
//		.reduceByKey((x, y) -> x + y).saveAsTextFile("C:\\Users\\sgulati\\Documents\\Result\\out_path1");
		
		JavaRDD<String> localFile2 =jsc.textFile("C:\\Users\\sgulati\\Documents\\Result\\test\\*");
		System.out.println(localFile2.getNumPartitions());
//		localFile2.flatMap(x -> Arrays.asList(x.split(" ")).iterator()).mapToPair(x -> new Tuple2<String, Integer>((String) x, 1))
//		.reduceByKey((x, y) -> x + y).saveAsTextFile("C:\\Users\\sgulati\\Documents\\Result\\out_path2");
////	   
//        JavaRDD<String> localFile3 =jsc.textFile("C:\\Users\\sgulati\\Documents\\Result\\test\\*,C:\\Users\\sgulati\\Documents\\Result\\test5\\*");
//		
//        localFile3.flatMap(x -> Arrays.asList(x.split(" ")).iterator()).mapToPair(x -> new Tuple2<String, Integer>((String) x, 1))
//		.reduceByKey((x, y) -> x + y).saveAsTextFile("C:\\Users\\sgulati\\Documents\\Result\\out_path3");
//        
//        JavaPairRDD<String, String> localFileWhole = jsc.wholeTextFiles("C:\\Users\\sgulati\\Documents\\Result\\test\\a.txt,C:\\Users\\sgulati\\Documents\\Result\\test\\b.txt");
//        System.out.println(localFileWhole.collect());
        
		jsc.close();
		
	}
 
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:39,代码来源:LFSExample.java

示例4: main

import org.apache.spark.api.java.JavaSparkContext; //导入方法依赖的package包/类
public static void main( String[] args )
{
    Dataset<Row> mutations = DataProvider.getMutationsToStructures();
    List<String> pdbIds = mutations.select(col("pdbId"))
            .distinct().toJavaRDD().map(t -> t.getString(0)).collect();

    List<Row> broadcasted = mutations.select("pdbId", "chainId", "pdbAtomPos").collectAsList();
    SaprkUtils.stopSparkSession();

    JavaSparkContext sc = SaprkUtils.getSparkContext();
    Broadcast<List<Row>> bcmut = sc.broadcast(broadcasted);

    MmtfReader//.readSequenceFile("/pdb/2017/full", pdbIds, sc)
            .downloadMmtfFiles(Arrays.asList("5IRC"), sc)
            .flatMapToPair(new StructureToPolymerChains())
            .flatMapToPair(new AddResidueToKey(bcmut))
            .mapValues(new StructureToBioJava())
            .mapToPair(new FilterResidue())
            .filter(t -> t._2!=null).keys()
            .map(t -> t.replace(".", ","))
            .saveAsTextFile("/Users/yana/git/mutantpdb/src/main/resources/pdb_residues");
    sc.close();
}
 
开发者ID:jjgao,项目名称:mutantpdb,代码行数:24,代码来源:App.java

示例5: main

import org.apache.spark.api.java.JavaSparkContext; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception{
    String srcBucketName;
    String scrBucketKey;
    String destBucketName;
    String destPrefix;
    ArgumentParser argumentParser = new ArgumentParser();
    AmazonS3 s3Client = new AmazonS3Client();

    try {
        BucketKey location = argumentParser.parseArguments(args);
        srcBucketName = location.getSrcBucket();
        scrBucketKey = location.getSrcKey();
        destBucketName = location.getDestBucket();
        destPrefix = location.getDestPrefix();
    } catch (ParseException e) {
        LOG.info(PARSE_ERROR_MSG);
        throw new IllegalArgumentException("Parser throw a parse Exception", e);
    }

    // Obtain the original manifest files
    InventoryManifestRetriever inventoryManifestRetriever =
            new InventoryManifestRetriever(s3Client, srcBucketName, scrBucketKey);
    InventoryManifest manifest = inventoryManifestRetriever.getInventoryManifest();

    // Check if the inventory report includes the StorageClass column
    String fileSchema = manifest.getFileSchema();
    String filterColumn = "storageClass";
    if (!StringUtils.containsIgnoreCase(fileSchema, filterColumn)) {
        throw new StorageClassNotIncludedException();
    }

    //Create Spark Context
    SparkConf sparkConf = new SparkConf();
    JavaSparkContext sc = new JavaSparkContext(sparkConf);
    Broadcast<CachedS3ClientFactory> clientFactory = sc.broadcast(new CachedS3ClientFactory());

    // Get the inventory report, split it into lines, parse each line to a POJO,
    // Filter, and write new csv file to S3
    JavaRDD<InventoryManifest.Locator> locatorRDD = sc.parallelize(manifest.getLocators());
    List<InventoryManifest.Locator> newLocatorList = locatorRDD
            .map(new InventoryReportLineRetriever(clientFactory, manifest))
            .flatMap(new InventoryReportMapper(manifest))
            .filter(new ReducedRedundancyStorageClassFilter())
            .mapPartitions(new WriteNewInventoryReportFunc(clientFactory, srcBucketName, manifest,
                    destBucketName, destPrefix))
            .collect();

    // Generate new manifest files including new locators, and send them back to S3
    new ManifestWriter(s3Client, destBucketName, destPrefix, srcBucketName, manifest)
            .writeManifest(newLocatorList);

    sc.close();
}
 
开发者ID:awslabs,项目名称:s3-inventory-usage-examples,代码行数:54,代码来源:ReducedRedundancyLocatorExampleMain.java

示例6: main

import org.apache.spark.api.java.JavaSparkContext; //导入方法依赖的package包/类
public static void main(String[] args) throws IOException {
    boolean argumentsValid = parseArguments(args);

    if (!argumentsValid) {
        printHelp();
        System.exit(1);
    }

    final Configuration hadoopConfig = new Configuration(true);
    final FileSystem hdfs = FileSystem.get(hadoopConfig);

    if (hdfs.exists(outputPath)) {
        System.out.printf("output path '%s' already exists in HDFS!%n", outputPath);
        System.exit(1);
    }

    System.out.printf("reading from:     %s%n", inputPath);
    System.out.printf("writing to:       %s%n", outputPath);
    System.out.printf("pattern:          %s%n", pattern.pattern());
    System.out.printf("sample fraction:  %f%n", sampleFraction);
    System.out.printf("...%n");
    System.out.printf("%n");

    SparkConf sparkConfig = new SparkConf().setAppName(String.format("Reading sample (fraction %f) from '%s'", sampleFraction, inputPath));
    JavaSparkContext sparkContext = new JavaSparkContext(sparkConfig);

    LogfileInputFormat.setPattern(hadoopConfig, pattern);

    JavaPairRDD<Tuple2<Path, Long>, Text> rdd = sparkContext.newAPIHadoopFile(
            inputPath.toString(),
            LogfileInputFormat.class,
            LogfileInputFormat.KEY_CLASS,
            Text.class,
            hadoopConfig);

    rdd.sample(false, sampleFraction)
            .map(tuple -> String.format("%[email protected]%016d:%n%n%s%n%n", tuple._1._1.toString(), tuple._1._2, tuple._2.toString()))
            .repartition(1)
            .saveAsTextFile(outputPath.toString());

    sparkContext.close();
}
 
开发者ID:comdirect,项目名称:hadoop-logfile-inputformat,代码行数:43,代码来源:Sample.java

示例7: main

import org.apache.spark.api.java.JavaSparkContext; //导入方法依赖的package包/类
public static void main(String[] args) throws IOException {

        final Configuration hadoopConfig = new Configuration(true);
        hdfs = FileSystem.get(hadoopConfig);

        if (!parseArguments(args)) {
            printHelp();
            System.exit(1);
        }

        if (hdfs.exists(directory)) {
            if (!hdfs.isDirectory(directory)) {
                System.out.printf("'%s' exists in HDFS, but is not a directory!%n", directory);
                System.exit(1);
            }
            FileStatus[] fileStatus = hdfs.listStatus(directory);
            if (fileStatus.length > 0) {
                System.out.printf("'%s' exists in HDFS, but is not empty!%n", directory);
                System.exit(1);
            }
        }

        createDirectories();

        System.out.printf("Creating test data in '%s'. This may take a while...%n", directory.toString());

        Map<String, LogfileType> logfileTypeByPath = new HashMap<>();

        LogfileSummary summary = writeLogFiles(logfileTypeByPath);

        SparkConf sparkConfig = new SparkConf().setAppName("Testing LogfileInputFormat.");
        JavaSparkContext sparkContext = new JavaSparkContext(sparkConfig);

        logfileTypeByPath.forEach((path, type) -> {
            LogfileInputFormat.setPattern(hadoopConfig, path, type.getFirstlinePattern());
        });
        LogfileInputFormat.setPattern(hadoopConfig, LogfileType.A.getFirstlinePattern());

        JavaPairRDD<Tuple2<Path, Long>, Text> rdd;
        JavaRDD<Tuple2<LocalDateTime, LogLevel>> logRecords;

        rdd = sparkContext.newAPIHadoopFile(logDir + "/*" + FILE_EXT_LOG, LogfileInputFormat.class, LogfileInputFormat.KEY_CLASS, Text.class, hadoopConfig);

        Function<Tuple2<Tuple2<Path, Long>, Text>, Tuple2<LocalDateTime, LogLevel>> mappingFunction = mappingFunction(logfileTypeByPath);

        logRecords = rdd.map(mappingFunction).cache();
        long totalCountLog = logRecords.count();
        long infoCountLog = logRecords.filter(tuple -> tuple._2 == LogLevel.INFO).count();
        long warnCountLog = logRecords.filter(tuple -> tuple._2 == LogLevel.WARN).count();
        long errorCountLog = logRecords.filter(tuple -> tuple._2 == LogLevel.ERROR).count();

        rdd = sparkContext.newAPIHadoopFile(logDirGz + "/*" + FILE_EXT_GZ, LogfileInputFormat.class, LogfileInputFormat.KEY_CLASS, Text.class, hadoopConfig);

        logRecords = rdd.map(mappingFunction).cache();
        long totalCountGz = logRecords.count();
        long infoCountGz = logRecords.filter(tuple -> tuple._2 == LogLevel.INFO).count();
        long warnCountGz = logRecords.filter(tuple -> tuple._2 == LogLevel.WARN).count();
        long errorCountGz = logRecords.filter(tuple -> tuple._2 == LogLevel.ERROR).count();

        long totalCountExpected = summary.getRecordCount();
        long infoCountExpected = summary.getRecordCount(LogLevel.INFO);
        long warnCountExpected = summary.getRecordCount(LogLevel.WARN);
        long errorCountExpected = summary.getRecordCount(LogLevel.ERROR);

        System.out.printf("%n%n%n%30s %15s %15s %15s %15s%n%n", "", "expected", "from *.log", "from *.log.gz", "test result");
        System.out.printf("%30s %15d %15d %15d %15s%n", "total # of log records",
                totalCountExpected, totalCountLog, totalCountGz,
                ((totalCountExpected == totalCountLog && totalCountLog == totalCountGz) ? "SUCCESS" : "FAILURE"));
        System.out.printf("%30s %15d %15d %15d %15s%n", "# of INFO level records",
                infoCountExpected, infoCountLog, infoCountGz,
                ((infoCountExpected == infoCountLog && infoCountLog == infoCountGz) ? "SUCCESS" : "FAILURE"));
        System.out.printf("%30s %15d %15d %15d %15s%n", "# of WARN level records",
                warnCountExpected, warnCountLog, warnCountGz,
                ((warnCountExpected == warnCountLog && warnCountLog == warnCountGz) ? "SUCCESS" : "FAILURE"));
        System.out.printf("%30s %15d %15d %15d %15s%n%n%n", "# of ERROR level records",
                errorCountExpected, errorCountLog, errorCountGz,
                ((errorCountExpected == errorCountLog && errorCountLog == errorCountGz) ? "SUCCESS" : "FAILURE"));

        sparkContext.close();
    }
 
开发者ID:comdirect,项目名称:hadoop-logfile-inputformat,代码行数:81,代码来源:Test.java


注:本文中的org.apache.spark.api.java.JavaSparkContext.close方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。