本文整理汇总了Java中org.apache.spark.Partition类的典型用法代码示例。如果您正苦于以下问题:Java Partition类的具体用法?Java Partition怎么用?Java Partition使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Partition类属于org.apache.spark包,在下文中一共展示了Partition类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: checkUserPartition
import org.apache.spark.Partition; //导入依赖的package包/类
protected void checkUserPartition(JavaRDD<String> userRDD) {
System.out.println("hhhhh");
List<Partition> partitios = userRDD.partitions();
System.out.println(partitios.size());
int[] partitionIds = new int[partitios.size()];
for (int i = 0; i < partitios.size(); i++) {
int index = partitios.get(i).index();
partitionIds[i] = index;
}
List<String>[] userIPs = userRDD.collectPartitions(partitionIds);
for (int i = 0; i < userIPs.length; i++) {
List<String> iuser = userIPs[i];
System.out.println(i + " partition");
System.out.println(iuser.toString());
}
}
示例2: getPartitions
import org.apache.spark.Partition; //导入依赖的package包/类
@Override
public Partition[] getPartitions() {
try {
final List<? extends Source<T>> partitionedSources =
microbatchSource.split(options.get());
final Partition[] partitions = new CheckpointableSourcePartition[partitionedSources.size()];
for (int i = 0; i < partitionedSources.size(); i++) {
partitions[i] =
new CheckpointableSourcePartition<>(
id(), i, partitionedSources.get(i), EmptyCheckpointMark.get());
}
return partitions;
} catch (Exception e) {
throw new RuntimeException("Failed to create partitions.", e);
}
}
示例3: initIterator
import org.apache.spark.Partition; //导入依赖的package包/类
@Override
public void initIterator(Partition dp, S config) {
int id = config.getRddId();
NewHadoopPartition split = (NewHadoopPartition) dp;
TaskAttemptID attemptId = DeepSparkHadoopMapReduceUtil
.newTaskAttemptID(jobTrackerId, id, true, split.index(), 0);
Configuration configuration = getHadoopConfig(config);
TaskAttemptContext hadoopAttemptContext = DeepSparkHadoopMapReduceUtil
.newTaskAttemptContext(configuration, attemptId);
try {
reader = inputFormat.createRecordReader(split.serializableHadoopSplit().value(), hadoopAttemptContext);
reader.initialize(split.serializableHadoopSplit().value(), hadoopAttemptContext);
} catch (IOException | InterruptedException e) {
throw new DeepGenericException(e);
}
}
示例4: getPartitions
import org.apache.spark.Partition; //导入依赖的package包/类
@Override
public Partition[] getPartitions(ExtractorConfig<T> config) {
GetPartitionsAction<T> getPartitionsAction = new GetPartitionsAction<>(config);
channel.writeAndFlush(getPartitionsAction);
Response response;
boolean interrupted = false;
for (; ; ) {
try {
response = answer.take();
break;
} catch (InterruptedException ignore) {
interrupted = true;
}
}
if (interrupted) {
Thread.currentThread().interrupt();
}
return ((GetPartitionsResponse) response).getPartitions();
}
示例5: initIterator
import org.apache.spark.Partition; //导入依赖的package包/类
@Override
public void initIterator(Partition dp, ExtractorConfig<T> config) {
InitIteratorAction<T> initIteratorAction = new InitIteratorAction<>(dp, config);
channel.writeAndFlush(initIteratorAction);
Response response;
boolean interrupted = false;
for (; ; ) {
try {
response = answer.take();
break;
} catch (InterruptedException ignore) {
interrupted = true;
}
}
if (interrupted) {
Thread.currentThread().interrupt();
}
return;
}
示例6: getPartitions
import org.apache.spark.Partition; //导入依赖的package包/类
/**
* {@inheritDoc}
*/
@Override
public Partition[] getPartitions(S config) {
jdbcDeepJobConfig = initConfig(config, jdbcDeepJobConfig);
int upperBound = jdbcDeepJobConfig.getUpperBound();
int lowerBound = jdbcDeepJobConfig.getLowerBound();
int numPartitions = jdbcDeepJobConfig.getNumPartitions();
int length = 1 + upperBound - lowerBound;
Partition [] result = new Partition[numPartitions];
for(int i=0; i<numPartitions; i++) {
int start = lowerBound + lowerBound + ((i * length) / numPartitions);
int end = lowerBound + (((i + 1) * length) / numPartitions) - 1;
result[i] = new JdbcPartition(i, start, end);
}
return result;
}
示例7: init
import org.apache.spark.Partition; //导入依赖的package包/类
/**
* Initialized the reader
*
* @param p
* Spark partition.
* @throws Exception
*/
public void init(Partition p) throws Exception {
Class.forName(jdbcDeepJobConfig.getDriverClass());
conn = DriverManager.getConnection(jdbcDeepJobConfig.getConnectionUrl(),
jdbcDeepJobConfig.getUsername(),
jdbcDeepJobConfig.getPassword());
Statement statement = conn.createStatement();
SelectQuery query = jdbcDeepJobConfig.getQuery();
JdbcPartition jdbcPartition = (JdbcPartition)p;
if(jdbcDeepJobConfig.getNumPartitions() > 1) {
Column partitionKey = jdbcDeepJobConfig.getPartitionKey();
query.getWhereClause().addCondition(BinaryCondition.lessThan(partitionKey, jdbcPartition.upper(), true))
.addCondition(BinaryCondition.greaterThan(partitionKey, jdbcPartition.lower(), true));
}
resultSet = statement.executeQuery(query.toString());
// Fetches first element
this.hasNext = resultSet.next();
}
示例8: coalesce
import org.apache.spark.Partition; //导入依赖的package包/类
@Override
public PartitionGroup[] coalesce(int maxPartitions, RDD<?> parent) {
if (maxPartitions != parent.getNumPartitions()) {
throw new IllegalArgumentException("Cannot use " + getClass().getSimpleName() +
" with a different number of partitions to the parent RDD.");
}
List<Partition> partitions = Arrays.asList(parent.getPartitions());
PartitionGroup[] groups = new PartitionGroup[partitions.size()];
for (int i = 0; i < partitions.size(); i++) {
Seq<String> preferredLocations = parent.getPreferredLocations(partitions.get(i));
scala.Option<String> preferredLocation = scala.Option.apply
(preferredLocations.isEmpty() ? null : preferredLocations.apply(0));
PartitionGroup group = new PartitionGroup(preferredLocation);
List<Partition> partitionsInGroup =
partitions.subList(i, maxEndPartitionIndexes.get(i) + 1);
group.partitions().append(JavaConversions.asScalaBuffer(partitionsInGroup));
groups[i] = group;
}
return groups;
}
示例9: compute
import org.apache.spark.Partition; //导入依赖的package包/类
@Override
public Iterator<double[]> compute(final Partition partition, final TaskContext context) {
ProgrammingError.throwIfNull(partition, context);
if (partition instanceof Partition2D) {
return this.compute((Partition2D) partition, context);
} else {
throw new IllegalArgumentException();
}
}
示例10: compute
import org.apache.spark.Partition; //导入依赖的package包/类
@Override
public Iterator<MatrixStore<N>> compute(final Partition partition, final TaskContext context) {
ProgrammingError.throwIfNull(partition, context);
if (partition instanceof Partition2D) {
return this.compute((Partition2D) partition, context);
} else {
throw new IllegalArgumentException();
}
}
示例11: compute
import org.apache.spark.Partition; //导入依赖的package包/类
@Override
public scala.collection.Iterator<TReturn> compute(Partition split, TaskContext context) {
String regionEdgesFamilyPath = this.regionsPaths.get(split.index());
log.info("Running Mizo on region #{} located at: {}", split.index(), regionEdgesFamilyPath);
return createRegionIterator(createRegionRelationsIterator(regionEdgesFamilyPath));
}
示例12: getPartitions
import org.apache.spark.Partition; //导入依赖的package包/类
@Override
public Partition[] getPartitions() {
return Iterators.toArray(IntStream
.range(0, this.regionsPaths.size())
.mapToObj(i -> (Partition) () -> i)
.iterator(), Partition.class);
}
示例13: compute
import org.apache.spark.Partition; //导入依赖的package包/类
@Override
public scala.collection.Iterator<WindowedValue<T>> compute(final Partition split,
final TaskContext context) {
final MetricsContainer metricsContainer = metricsAccum.localValue().getContainer(stepName);
@SuppressWarnings("unchecked")
final BoundedSource.BoundedReader<T> reader = createReader((SourcePartition<T>) split);
final Iterator<WindowedValue<T>> readerIterator =
new ReaderToIteratorAdapter<>(metricsContainer, reader);
return new InterruptibleIterator<>(context, JavaConversions.asScalaIterator(readerIterator));
}
示例14: compute
import org.apache.spark.Partition; //导入依赖的package包/类
@Override
public scala.collection.Iterator<Map.Entry<Key, Value>> compute(final Partition split, final TaskContext context) {
final ByteArrayInputStream bais = new ByteArrayInputStream(serialisedConfiguration);
final Configuration configuration = new Configuration();
try {
configuration.readFields(new DataInputStream(bais));
bais.close();
} catch (final IOException e) {
throw new RuntimeException("IOException deserialising Configuration from byte array", e);
}
return new InterruptibleIterator<>(context,
JavaConversions.asScalaIterator(new RFileReaderIterator(split, context, configuration, auths)));
}
示例15: RFileReaderIterator
import org.apache.spark.Partition; //导入依赖的package包/类
public RFileReaderIterator(final Partition partition,
final TaskContext taskContext,
final Configuration configuration,
final Set<String> auths) {
this.partition = partition;
this.taskContext = taskContext;
this.configuration = configuration;
this.auths = auths;
try {
init();
} catch (final IOException e) {
throw new RuntimeException("IOException initialising RFileReaderIterator", e);
}
}