本文整理汇总了Java中org.apache.spark.SparkContext.newAPIHadoopRDD方法的典型用法代码示例。如果您正苦于以下问题:Java SparkContext.newAPIHadoopRDD方法的具体用法?Java SparkContext.newAPIHadoopRDD怎么用?Java SparkContext.newAPIHadoopRDD使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.SparkContext
的用法示例。
在下文中一共展示了SparkContext.newAPIHadoopRDD方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: doOperation
import org.apache.spark.SparkContext; //导入方法依赖的package包/类
private RDD<Element> doOperation(final GetRDDOfElements operation,
final Context context,
final AccumuloStore accumuloStore)
throws OperationException {
final Configuration conf = getConfiguration(operation);
final SparkContext sparkContext = SparkContextUtil.getSparkSession(context, accumuloStore.getProperties()).sparkContext();
sparkContext.hadoopConfiguration().addResource(conf);
// Use batch scan option when performing seeded operation
InputConfigurator.setBatchScan(AccumuloInputFormat.class, conf, true);
addIterators(accumuloStore, conf, context.getUser(), operation);
addRanges(accumuloStore, conf, operation);
final RDD<Tuple2<Element, NullWritable>> pairRDD = sparkContext.newAPIHadoopRDD(conf,
ElementInputFormat.class,
Element.class,
NullWritable.class);
return pairRDD.map(new FirstElement(), ClassTagConstants.ELEMENT_CLASS_TAG);
}
示例2: getVertexRDD
import org.apache.spark.SparkContext; //导入方法依赖的package包/类
public RDD<Tuple2<Object, RyaTypeWritable>> getVertexRDD(SparkContext sc, Configuration conf) throws IOException, AccumuloSecurityException{
// Load configuration parameters
zk = MRUtils.getACZK(conf);
instance = MRUtils.getACInstance(conf);
userName = MRUtils.getACUserName(conf);
pwd = MRUtils.getACPwd(conf);
mock = MRUtils.getACMock(conf, false);
tablePrefix = MRUtils.getTablePrefix(conf);
// Set authorizations if specified
String authString = conf.get(MRUtils.AC_AUTH_PROP);
if (authString != null && !authString.isEmpty()) {
authorizations = new Authorizations(authString.split(","));
conf.set(ConfigUtils.CLOUDBASE_AUTHS, authString); // for consistency
}
else {
authorizations = AccumuloRdfConstants.ALL_AUTHORIZATIONS;
}
// Set table prefix to the default if not set
if (tablePrefix == null) {
tablePrefix = RdfCloudTripleStoreConstants.TBL_PRFX_DEF;
MRUtils.setTablePrefix(conf, tablePrefix);
}
// Check for required configuration parameters
Preconditions.checkNotNull(instance, "Accumulo instance name [" + MRUtils.AC_INSTANCE_PROP + "] not set.");
Preconditions.checkNotNull(userName, "Accumulo username [" + MRUtils.AC_USERNAME_PROP + "] not set.");
Preconditions.checkNotNull(pwd, "Accumulo password [" + MRUtils.AC_PWD_PROP + "] not set.");
Preconditions.checkNotNull(tablePrefix, "Table prefix [" + MRUtils.TABLE_PREFIX_PROPERTY + "] not set.");
RdfCloudTripleStoreConstants.prefixTables(tablePrefix);
// If connecting to real accumulo, set additional parameters and require zookeepers
if (!mock) conf.set(ConfigUtils.CLOUDBASE_ZOOKEEPERS, zk); // for consistency
// Ensure consistency between alternative configuration properties
conf.set(ConfigUtils.CLOUDBASE_INSTANCE, instance);
conf.set(ConfigUtils.CLOUDBASE_USER, userName);
conf.set(ConfigUtils.CLOUDBASE_PASSWORD, pwd);
conf.setBoolean(ConfigUtils.USE_MOCK_INSTANCE, mock);
conf.set(RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX, tablePrefix);
Job job = Job.getInstance(conf, sc.appName());
ClientConfiguration clientConfig = new ClientConfiguration().with(ClientProperty.INSTANCE_NAME, instance).with(ClientProperty.INSTANCE_ZK_HOST, zk);
GraphXInputFormat.setInputTableName(job, EntityCentricIndex.getTableName(conf));
GraphXInputFormat.setConnectorInfo(job, userName, new PasswordToken(pwd));
GraphXInputFormat.setZooKeeperInstance(job, clientConfig);
GraphXInputFormat.setScanAuthorizations(job, authorizations);
return sc.newAPIHadoopRDD(job.getConfiguration(), GraphXInputFormat.class, Object.class, RyaTypeWritable.class);
}
示例3: getEdgeRDD
import org.apache.spark.SparkContext; //导入方法依赖的package包/类
public RDD<Tuple2<Object, Edge>> getEdgeRDD(SparkContext sc, Configuration conf) throws IOException, AccumuloSecurityException{
// Load configuration parameters
zk = MRUtils.getACZK(conf);
instance = MRUtils.getACInstance(conf);
userName = MRUtils.getACUserName(conf);
pwd = MRUtils.getACPwd(conf);
mock = MRUtils.getACMock(conf, false);
tablePrefix = MRUtils.getTablePrefix(conf);
// Set authorizations if specified
String authString = conf.get(MRUtils.AC_AUTH_PROP);
if (authString != null && !authString.isEmpty()) {
authorizations = new Authorizations(authString.split(","));
conf.set(ConfigUtils.CLOUDBASE_AUTHS, authString); // for consistency
}
else {
authorizations = AccumuloRdfConstants.ALL_AUTHORIZATIONS;
}
// Set table prefix to the default if not set
if (tablePrefix == null) {
tablePrefix = RdfCloudTripleStoreConstants.TBL_PRFX_DEF;
MRUtils.setTablePrefix(conf, tablePrefix);
}
// Check for required configuration parameters
Preconditions.checkNotNull(instance, "Accumulo instance name [" + MRUtils.AC_INSTANCE_PROP + "] not set.");
Preconditions.checkNotNull(userName, "Accumulo username [" + MRUtils.AC_USERNAME_PROP + "] not set.");
Preconditions.checkNotNull(pwd, "Accumulo password [" + MRUtils.AC_PWD_PROP + "] not set.");
Preconditions.checkNotNull(tablePrefix, "Table prefix [" + MRUtils.TABLE_PREFIX_PROPERTY + "] not set.");
RdfCloudTripleStoreConstants.prefixTables(tablePrefix);
// If connecting to real accumulo, set additional parameters and require zookeepers
if (!mock) conf.set(ConfigUtils.CLOUDBASE_ZOOKEEPERS, zk); // for consistency
// Ensure consistency between alternative configuration properties
conf.set(ConfigUtils.CLOUDBASE_INSTANCE, instance);
conf.set(ConfigUtils.CLOUDBASE_USER, userName);
conf.set(ConfigUtils.CLOUDBASE_PASSWORD, pwd);
conf.setBoolean(ConfigUtils.USE_MOCK_INSTANCE, mock);
conf.set(RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX, tablePrefix);
Job job = Job.getInstance(conf, sc.appName());
ClientConfiguration clientConfig = new ClientConfiguration().with(ClientProperty.INSTANCE_NAME, instance).with(ClientProperty.INSTANCE_ZK_HOST, zk);
RyaInputFormat.setTableLayout(job, TABLE_LAYOUT.SPO);
RyaInputFormat.setConnectorInfo(job, userName, new PasswordToken(pwd));
RyaInputFormat.setZooKeeperInstance(job, clientConfig);
RyaInputFormat.setScanAuthorizations(job, authorizations);
String tableName = RdfCloudTripleStoreUtils.layoutPrefixToTable(TABLE_LAYOUT.SPO, tablePrefix);
InputFormatBase.setInputTableName(job, tableName);
return sc.newAPIHadoopRDD(job.getConfiguration(), GraphXEdgeInputFormat.class, Object.class, Edge.class);
}
示例4: rddForSimpleFeatures
import org.apache.spark.SparkContext; //导入方法依赖的package包/类
public static JavaPairRDD<GeoWaveInputKey, SimpleFeature> rddForSimpleFeatures(
SparkContext sc,
DataStorePluginOptions storeOptions,
DistributableQuery query,
QueryOptions queryOptions,
int minSplits,
int maxSplits )
throws IOException {
Configuration conf = new Configuration(
sc.hadoopConfiguration());
GeoWaveInputFormat.setStoreOptions(
conf,
storeOptions);
if (query != null) {
GeoWaveInputFormat.setQuery(
conf,
query);
}
if (queryOptions != null) {
GeoWaveInputFormat.setQueryOptions(
conf,
queryOptions);
}
if (minSplits > -1) {
GeoWaveInputFormat.setMinimumSplitCount(
conf,
minSplits);
GeoWaveInputFormat.setMaximumSplitCount(
conf,
maxSplits);
}
RDD<Tuple2<GeoWaveInputKey, SimpleFeature>> rdd = sc.newAPIHadoopRDD(
conf,
GeoWaveInputFormat.class,
GeoWaveInputKey.class,
SimpleFeature.class);
JavaPairRDD<GeoWaveInputKey, SimpleFeature> javaRdd = JavaPairRDD.fromRDD(
rdd,
(ClassTag) scala.reflect.ClassTag$.MODULE$.apply(GeoWaveInputKey.class),
(ClassTag) scala.reflect.ClassTag$.MODULE$.apply(SimpleFeature.class));
return javaRdd;
}