Java SparkContext.newAPIHadoopRDD方法代码示例

本文整理汇总了Java中org.apache.spark.SparkContext.newAPIHadoopRDD方法的典型用法代码示例。如果您正苦于以下问题：Java SparkContext.newAPIHadoopRDD方法的具体用法？Java SparkContext.newAPIHadoopRDD怎么用？Java SparkContext.newAPIHadoopRDD使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.SparkContext的用法示例。

在下文中一共展示了SparkContext.newAPIHadoopRDD方法的4个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: doOperation

import org.apache.spark.SparkContext; //导入方法依赖的package包/类
private RDD<Element> doOperation(final GetRDDOfElements operation,
                                 final Context context,
                                 final AccumuloStore accumuloStore)
        throws OperationException {
    final Configuration conf = getConfiguration(operation);
    final SparkContext sparkContext = SparkContextUtil.getSparkSession(context, accumuloStore.getProperties()).sparkContext();
    sparkContext.hadoopConfiguration().addResource(conf);
    // Use batch scan option when performing seeded operation
    InputConfigurator.setBatchScan(AccumuloInputFormat.class, conf, true);
    addIterators(accumuloStore, conf, context.getUser(), operation);
    addRanges(accumuloStore, conf, operation);
    final RDD<Tuple2<Element, NullWritable>> pairRDD = sparkContext.newAPIHadoopRDD(conf,
            ElementInputFormat.class,
            Element.class,
            NullWritable.class);
    return pairRDD.map(new FirstElement(), ClassTagConstants.ELEMENT_CLASS_TAG);
}

开发者ID:gchq，项目名称:Gaffer，代码行数:18，代码来源:GetRDDOfElementsHandler.java

示例2: getVertexRDD

import org.apache.spark.SparkContext; //导入方法依赖的package包/类
public RDD<Tuple2<Object, RyaTypeWritable>> getVertexRDD(SparkContext sc, Configuration conf) throws IOException, AccumuloSecurityException{
    // Load configuration parameters
    zk = MRUtils.getACZK(conf);
    instance = MRUtils.getACInstance(conf);
    userName = MRUtils.getACUserName(conf);
    pwd = MRUtils.getACPwd(conf);
    mock = MRUtils.getACMock(conf, false);
    tablePrefix = MRUtils.getTablePrefix(conf);
    // Set authorizations if specified
    String authString = conf.get(MRUtils.AC_AUTH_PROP);
    if (authString != null && !authString.isEmpty()) {
        authorizations = new Authorizations(authString.split(","));
        conf.set(ConfigUtils.CLOUDBASE_AUTHS, authString); // for consistency
    }
    else {
        authorizations = AccumuloRdfConstants.ALL_AUTHORIZATIONS;
    }
    // Set table prefix to the default if not set
    if (tablePrefix == null) {
        tablePrefix = RdfCloudTripleStoreConstants.TBL_PRFX_DEF;
        MRUtils.setTablePrefix(conf, tablePrefix);
    }
    // Check for required configuration parameters
    Preconditions.checkNotNull(instance, "Accumulo instance name [" + MRUtils.AC_INSTANCE_PROP + "] not set.");
    Preconditions.checkNotNull(userName, "Accumulo username [" + MRUtils.AC_USERNAME_PROP + "] not set.");
    Preconditions.checkNotNull(pwd, "Accumulo password [" + MRUtils.AC_PWD_PROP + "] not set.");
    Preconditions.checkNotNull(tablePrefix, "Table prefix [" + MRUtils.TABLE_PREFIX_PROPERTY + "] not set.");
    RdfCloudTripleStoreConstants.prefixTables(tablePrefix);
    // If connecting to real accumulo, set additional parameters and require zookeepers
    if (!mock) conf.set(ConfigUtils.CLOUDBASE_ZOOKEEPERS, zk); // for consistency
    // Ensure consistency between alternative configuration properties
    conf.set(ConfigUtils.CLOUDBASE_INSTANCE, instance);
    conf.set(ConfigUtils.CLOUDBASE_USER, userName);
    conf.set(ConfigUtils.CLOUDBASE_PASSWORD, pwd);
    conf.setBoolean(ConfigUtils.USE_MOCK_INSTANCE, mock);
    conf.set(RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX, tablePrefix);

    Job job = Job.getInstance(conf, sc.appName());

    ClientConfiguration clientConfig = new ClientConfiguration().with(ClientProperty.INSTANCE_NAME, instance).with(ClientProperty.INSTANCE_ZK_HOST, zk);

    GraphXInputFormat.setInputTableName(job, EntityCentricIndex.getTableName(conf));
    GraphXInputFormat.setConnectorInfo(job, userName, new PasswordToken(pwd));
    GraphXInputFormat.setZooKeeperInstance(job, clientConfig);
    GraphXInputFormat.setScanAuthorizations(job, authorizations);

    return sc.newAPIHadoopRDD(job.getConfiguration(), GraphXInputFormat.class, Object.class, RyaTypeWritable.class);
}

开发者ID:apache，项目名称:incubator-rya，代码行数:49，代码来源:GraphXGraphGenerator.java

示例3: getEdgeRDD

import org.apache.spark.SparkContext; //导入方法依赖的package包/类
public RDD<Tuple2<Object, Edge>> getEdgeRDD(SparkContext sc, Configuration conf) throws IOException, AccumuloSecurityException{
    // Load configuration parameters
    zk = MRUtils.getACZK(conf);
    instance = MRUtils.getACInstance(conf);
    userName = MRUtils.getACUserName(conf);
    pwd = MRUtils.getACPwd(conf);
    mock = MRUtils.getACMock(conf, false);
    tablePrefix = MRUtils.getTablePrefix(conf);
    // Set authorizations if specified
    String authString = conf.get(MRUtils.AC_AUTH_PROP);
    if (authString != null && !authString.isEmpty()) {
        authorizations = new Authorizations(authString.split(","));
        conf.set(ConfigUtils.CLOUDBASE_AUTHS, authString); // for consistency
    }
    else {
        authorizations = AccumuloRdfConstants.ALL_AUTHORIZATIONS;
    }
    // Set table prefix to the default if not set
    if (tablePrefix == null) {
        tablePrefix = RdfCloudTripleStoreConstants.TBL_PRFX_DEF;
        MRUtils.setTablePrefix(conf, tablePrefix);
    }
    // Check for required configuration parameters
    Preconditions.checkNotNull(instance, "Accumulo instance name [" + MRUtils.AC_INSTANCE_PROP + "] not set.");
    Preconditions.checkNotNull(userName, "Accumulo username [" + MRUtils.AC_USERNAME_PROP + "] not set.");
    Preconditions.checkNotNull(pwd, "Accumulo password [" + MRUtils.AC_PWD_PROP + "] not set.");
    Preconditions.checkNotNull(tablePrefix, "Table prefix [" + MRUtils.TABLE_PREFIX_PROPERTY + "] not set.");
    RdfCloudTripleStoreConstants.prefixTables(tablePrefix);
    // If connecting to real accumulo, set additional parameters and require zookeepers
    if (!mock) conf.set(ConfigUtils.CLOUDBASE_ZOOKEEPERS, zk); // for consistency
    // Ensure consistency between alternative configuration properties
    conf.set(ConfigUtils.CLOUDBASE_INSTANCE, instance);
    conf.set(ConfigUtils.CLOUDBASE_USER, userName);
    conf.set(ConfigUtils.CLOUDBASE_PASSWORD, pwd);
    conf.setBoolean(ConfigUtils.USE_MOCK_INSTANCE, mock);
    conf.set(RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX, tablePrefix);

    Job job = Job.getInstance(conf, sc.appName());

    ClientConfiguration clientConfig = new ClientConfiguration().with(ClientProperty.INSTANCE_NAME, instance).with(ClientProperty.INSTANCE_ZK_HOST, zk);

    RyaInputFormat.setTableLayout(job, TABLE_LAYOUT.SPO);
    RyaInputFormat.setConnectorInfo(job, userName, new PasswordToken(pwd));
    RyaInputFormat.setZooKeeperInstance(job, clientConfig);
    RyaInputFormat.setScanAuthorizations(job, authorizations);
            String tableName = RdfCloudTripleStoreUtils.layoutPrefixToTable(TABLE_LAYOUT.SPO, tablePrefix);
            InputFormatBase.setInputTableName(job, tableName);
    return sc.newAPIHadoopRDD(job.getConfiguration(), GraphXEdgeInputFormat.class, Object.class, Edge.class);
}

开发者ID:apache，项目名称:incubator-rya，代码行数:50，代码来源:GraphXGraphGenerator.java

示例4: rddForSimpleFeatures

import org.apache.spark.SparkContext; //导入方法依赖的package包/类
public static JavaPairRDD<GeoWaveInputKey, SimpleFeature> rddForSimpleFeatures(
		SparkContext sc,
		DataStorePluginOptions storeOptions,
		DistributableQuery query,
		QueryOptions queryOptions,
		int minSplits,
		int maxSplits )
		throws IOException {
	Configuration conf = new Configuration(
			sc.hadoopConfiguration());

	GeoWaveInputFormat.setStoreOptions(
			conf,
			storeOptions);

	if (query != null) {
		GeoWaveInputFormat.setQuery(
				conf,
				query);
	}

	if (queryOptions != null) {
		GeoWaveInputFormat.setQueryOptions(
				conf,
				queryOptions);
	}

	if (minSplits > -1) {
		GeoWaveInputFormat.setMinimumSplitCount(
				conf,
				minSplits);
		GeoWaveInputFormat.setMaximumSplitCount(
				conf,
				maxSplits);
	}

	RDD<Tuple2<GeoWaveInputKey, SimpleFeature>> rdd = sc.newAPIHadoopRDD(
			conf,
			GeoWaveInputFormat.class,
			GeoWaveInputKey.class,
			SimpleFeature.class);

	JavaPairRDD<GeoWaveInputKey, SimpleFeature> javaRdd = JavaPairRDD.fromRDD(
			rdd,
			(ClassTag) scala.reflect.ClassTag$.MODULE$.apply(GeoWaveInputKey.class),
			(ClassTag) scala.reflect.ClassTag$.MODULE$.apply(SimpleFeature.class));

	return javaRdd;
}

开发者ID:locationtech，项目名称:geowave，代码行数:50，代码来源:GeoWaveRDD.java

注：本文中的org.apache.spark.SparkContext.newAPIHadoopRDD方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。