本文整理汇总了Java中org.apache.spark.broadcast.Broadcast类的典型用法代码示例。如果您正苦于以下问题:Java Broadcast类的具体用法?Java Broadcast怎么用?Java Broadcast使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Broadcast类属于org.apache.spark.broadcast包,在下文中一共展示了Broadcast类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: setPartitionHeaders
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
public static JavaRDD<SAMRecord> setPartitionHeaders(final JavaRDD<SAMRecord> reads, final Broadcast<SAMFileHeader> header) {
return reads.mapPartitions(records -> {
//header.getValue().setTextHeader(header.getValue().getTextHeader()+"\\[email protected]\\tSN:"+records..getReferenceName());
//record.setHeader(header);
BAMHeaderOutputFormat.setHeader(header.getValue());
return records;
});
}
示例2: run
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
/**
*
* @param topKvalueCandidates the topK results per entity, acquired from value similarity
* @param rawTriples1 the rdf triples of the first entity collection
* @param rawTriples2 the rdf triples of the second entity collection
* @param SEPARATOR the delimiter that separates subjects, predicates and objects in the rawTriples1 and rawTriples2 files
* @param entityIds1 the mapping of entity urls to entity ids, as it was used in blocking
* @param entityIds2
* @param MIN_SUPPORT_THRESHOLD the minimum support threshold, below which, relations are discarded from top relations
* @param K the K for topK candidate matches
* @param N the N for topN rdf relations (and neighbors)
* @param jsc the java spark context used to load files and broadcast variables
* @return topK neighbor candidates per entity
*/
public JavaPairRDD<Integer, IntArrayList> run(JavaPairRDD<Integer,Int2FloatLinkedOpenHashMap> topKvalueCandidates,
JavaRDD<String> rawTriples1,
JavaRDD<String> rawTriples2,
String SEPARATOR,
JavaRDD<String> entityIds1,
JavaRDD<String> entityIds2,
float MIN_SUPPORT_THRESHOLD,
int K,
int N,
JavaSparkContext jsc) {
Map<Integer,IntArrayList> inNeighbors = new HashMap<>(new RelationsRank().run(rawTriples1, SEPARATOR, entityIds1, MIN_SUPPORT_THRESHOLD, N, true, jsc));
inNeighbors.putAll(new RelationsRank().run(rawTriples2, SEPARATOR, entityIds2, MIN_SUPPORT_THRESHOLD, N, false, jsc));
Broadcast<Map<Integer,IntArrayList>> inNeighbors_BV = jsc.broadcast(inNeighbors);
JavaPairRDD<Integer, IntArrayList> topKneighborCandidates = getTopKNeighborSimsSUM(topKvalueCandidates, inNeighbors_BV, K);
return topKneighborCandidates;
}
示例3: main
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
public static void main(String[] args) {
// SparkConf conf = new SparkConf().setMaster("local").setAppName("BroadCasting");
// JavaSparkContext jsc = new JavaSparkContext(conf);
//
// Broadcast<String> broadcastVar = jsc.broadcast("Hello Spark");
//
SparkSession sparkSession = SparkSession.builder().master("local").appName("My App")
.config("spark.sql.warehouse.dir", "file:////C:/Users/sgulati/spark-warehouse").getOrCreate();
Broadcast<String> broadcastVar= sparkSession.sparkContext().broadcast("Hello Spark", scala.reflect.ClassTag$.MODULE$.apply(String.class));
System.out.println(broadcastVar.getValue());
broadcastVar.unpersist();
// broadcastVar.unpersist(true);
broadcastVar.destroy();
}
示例4: readsToWritable
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
public static JavaPairRDD<SAMRecord, SAMRecordWritable> readsToWritable(JavaRDD<SAMRecord> records, Broadcast<SAMFileHeader> header) {
return records.mapToPair(read -> {
//SEQUENCE DICTIONARY must be set here for the alignment because it's not given as header file
//Set in alignment to sam map phase
if(header.getValue().getSequenceDictionary()==null) header.getValue().setSequenceDictionary(new SAMSequenceDictionary());
if(header.getValue().getSequenceDictionary().getSequence(read.getReferenceName())==null)
header.getValue().getSequenceDictionary().addSequence(new SAMSequenceRecord(read.getReferenceName()));
//read.setHeader(read.getHeader());
read.setHeaderStrict(header.getValue());
final SAMRecordWritable samRecordWritable = new SAMRecordWritable();
samRecordWritable.set(read);
return new Tuple2<>(read, samRecordWritable);
});
}
示例5: run
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
/**
*
* @param topKvalueCandidates the topK results per entity, acquired from value similarity
* @param rawTriples1 the rdf triples of the first entity collection
* @param rawTriples2 the rdf triples of the second entity collection
* @param SEPARATOR the delimiter that separates subjects, predicates and objects in the rawTriples1 and rawTriples2 files
* @param entityIds1 the mapping of entity urls to entity ids, as it was used in blocking
* @param entityIds2
* @param MIN_SUPPORT_THRESHOLD the minimum support threshold, below which, relations are discarded from top relations
* @param K the K for topK candidate matches
* @param N the N for topN rdf relations (and neighbors)
* @param jsc the java spark context used to load files and broadcast variables
* @return topK neighbor candidates per entity
*/
public JavaPairRDD<Integer, IntArrayList> run(JavaPairRDD<Integer,Int2FloatLinkedOpenHashMap> topKvalueCandidates,
JavaRDD<String> rawTriples1,
JavaRDD<String> rawTriples2,
String SEPARATOR,
JavaRDD<String> entityIds1,
JavaRDD<String> entityIds2,
float MIN_SUPPORT_THRESHOLD,
int K,
int N,
JavaSparkContext jsc) {
Map<Integer,IntArrayList> inNeighbors = new HashMap<>(new RelationsRank().run(rawTriples1, SEPARATOR, entityIds1, MIN_SUPPORT_THRESHOLD, N, true, jsc));
inNeighbors.putAll(new RelationsRank().run(rawTriples2, SEPARATOR, entityIds2, MIN_SUPPORT_THRESHOLD, N, false, jsc));
Broadcast<Map<Integer,IntArrayList>> inNeighbors_BV = jsc.broadcast(inNeighbors);
//JavaPairRDD<Integer, IntArrayList> topKneighborCandidates = getTopKNeighborSims(topKvalueCandidates, inNeighbors_BV, K);
JavaPairRDD<Integer, IntArrayList> topKneighborCandidates = getTopKNeighborSimsSUM(topKvalueCandidates, inNeighbors_BV, K);
return topKneighborCandidates;
}
示例6: run2
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
/**
*
* @param topKvalueCandidates the topK results per entity, acquired from value similarity
* @param rawTriples1 the rdf triples of the first entity collection
* @param rawTriples2 the rdf triples of the second entity collection
* @param SEPARATOR the delimiter that separates subjects, predicates and objects in the rawTriples1 and rawTriples2 files
* @param entityIds1 the mapping of entity urls to entity ids, as it was used in blocking
* @param entityIds2
* @param MIN_SUPPORT_THRESHOLD the minimum support threshold, below which, relations are discarded from top relations
* @param K the K for topK candidate matches
* @param N the N for topN rdf relations (and neighbors)
* @param jsc the java spark context used to load files and broadcast variables
* @return topK neighbor candidates per entity
*/
public JavaPairRDD<Integer, Int2FloatLinkedOpenHashMap> run2(JavaPairRDD<Integer,Int2FloatLinkedOpenHashMap> topKvalueCandidates,
JavaRDD<String> rawTriples1,
JavaRDD<String> rawTriples2,
String SEPARATOR,
JavaRDD<String> entityIds1,
JavaRDD<String> entityIds2,
float MIN_SUPPORT_THRESHOLD,
int K,
int N,
JavaSparkContext jsc) {
Map<Integer,IntArrayList> inNeighbors = new HashMap<>(new RelationsRank().run(rawTriples1, SEPARATOR, entityIds1, MIN_SUPPORT_THRESHOLD, N, true, jsc));
inNeighbors.putAll(new RelationsRank().run(rawTriples2, SEPARATOR, entityIds2, MIN_SUPPORT_THRESHOLD, N, false, jsc));
Broadcast<Map<Integer,IntArrayList>> inNeighbors_BV = jsc.broadcast(inNeighbors);
JavaPairRDD<Integer, Int2FloatLinkedOpenHashMap> topKneighborCandidates = getTopKNeighborSimsSUMWithScores(topKvalueCandidates, inNeighbors_BV, K);
return topKneighborCandidates;
}
示例7: run
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
/**
*
* @param topKvalueCandidates the topK results per entity, acquired from value similarity
* @param rawTriples1 the rdf triples of the first entity collection
* @param rawTriples2 the rdf triples of the second entity collection
* @param SEPARATOR the delimiter that separates subjects, predicates and objects in the rawTriples1 and rawTriples2 files
* @param entityIds1 the mapping of entity urls to entity ids, as it was used in blocking
* @param entityIds2
* @param MIN_SUPPORT_THRESHOLD the minimum support threshold, below which, relations are discarded from top relations
* @param K the K for topK candidate matches
* @param N the N for topN rdf relations (and neighbors)
* @param jsc the java spark context used to load files and broadcast variables
* @return topK neighbor candidates per entity
*/
public JavaPairRDD<Integer, IntArrayList> run(JavaPairRDD<Integer,Int2FloatLinkedOpenHashMap> topKvalueCandidates,
JavaRDD<String> rawTriples1,
JavaRDD<String> rawTriples2,
String SEPARATOR,
JavaRDD<String> entityIds1,
JavaRDD<String> entityIds2,
float MIN_SUPPORT_THRESHOLD,
int K,
int N,
JavaSparkContext jsc) {
Map<Integer,IntArrayList> inNeighbors = new HashMap<>(new RelationsRank().run(rawTriples1, SEPARATOR, entityIds1, MIN_SUPPORT_THRESHOLD, N, true, jsc));
inNeighbors.putAll(new RelationsRank().run(rawTriples2, SEPARATOR, entityIds2, MIN_SUPPORT_THRESHOLD, N, false, jsc));
Broadcast<Map<Integer,IntArrayList>> inNeighbors_BV = jsc.broadcast(inNeighbors);
//JavaPairRDD<Tuple2<Integer, Integer>, Float> neighborSims = getNeighborSims(topKvalueCandidates, inNeighbors_BV);
//JavaPairRDD<Integer, IntArrayList> topKneighborCandidates = getTopKNeighborSimsOld(neighborSims, K);
JavaPairRDD<Integer, IntArrayList> topKneighborCandidates = getTopKNeighborSims(topKvalueCandidates, inNeighbors_BV, K);
return topKneighborCandidates;
}
示例8: readAndConvertFeatureRDD
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
private static RDD<Tuple2<Object,double[]>> readAndConvertFeatureRDD(
JavaPairRDD<String,float[]> javaRDD,
Broadcast<Map<String,Integer>> bIdToIndex) {
RDD<Tuple2<Integer,double[]>> scalaRDD = javaRDD.mapToPair(t ->
new Tuple2<>(bIdToIndex.value().get(t._1()), t._2())
).mapValues(f -> {
double[] d = new double[f.length];
for (int i = 0; i < d.length; i++) {
d[i] = f[i];
}
return d;
}
).rdd();
// This mimics the persistence level establish by ALS training methods
scalaRDD.persist(StorageLevel.MEMORY_AND_DISK());
@SuppressWarnings("unchecked")
RDD<Tuple2<Object,double[]>> objKeyRDD = (RDD<Tuple2<Object,double[]>>) (RDD<?>) scalaRDD;
return objKeyRDD;
}
示例9: getRdfsLabels
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
/**
* Get map of rdfs:labels for specified URIs
*
* @param quads Quads to use for retrieving labels
* @param uris Set of URI Strings to find labels for
* @return map of URI -> rdfs:label
*/
private Map<String, String> getRdfsLabels(JavaRDD<Quad> quads, Set<String> uris) {
Broadcast<Set<String>> broadcastURIs = sc.broadcast(uris);
Map<String, String> nonSerializableMap = quads.filter(quad ->
// filter out label predicates for specified subject URIs
quad.getPredicate().isURI() &&
quad.getPredicate().getURI().equals(LABEL_URI) &&
quad.getSubject().isURI() &&
(broadcastURIs.getValue().contains(quad.getSubject().getURI()))
// map to pair of uri, label
).mapToPair(quad -> new Tuple2<>(
quad.getSubject().getURI(),
quad.getObject().getLiteralValue().toString()
)).collectAsMap();
return new HashMap<>(nonSerializableMap);
}
示例10: find
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
protected boolean find(ColumnCombinationBitset columnCombination) {
if(this.columnCombinationMap.isEmpty()){
return false;
}
else{
Broadcast<ColumnCombinationBitset> bCcb = Singleton.getSparkContext().broadcast(columnCombination);
JavaRDD<ColumnCombinationBitset> check = this.columnCombinationMap.filter((ColumnCombinationBitset ccb) -> ccb.equals((Object) bCcb.value()));
if(check.isEmpty()){
bCcb.destroy();
return false;
}
else{
bCcb.destroy();
return true;
}
}
}
示例11: getNextParentColumnCombination
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
protected ColumnCombinationBitset getNextParentColumnCombination(ColumnCombinationBitset column) {
Broadcast<ColumnCombinationBitset> bColumn = Singleton.getSparkContext().broadcast(column);
//if minimal positives contain column return null
if(!this.minimalPositives.filter((ColumnCombinationBitset ccb) -> ccb.equals(bColumn.value())).isEmpty()){
return null;
}
List<ColumnCombinationBitset> supersets = column.getDirectSupersets(this.bitmaskForNonUniqueColumns);
JavaRDD<ColumnCombinationBitset> supersetsRdd = Singleton.getSparkContext().parallelize(supersets);
//destroy broadcast variable
bColumn.destroy();
return this.findUnprunedSet(supersetsRdd);
}
示例12: getNextChildColumnCombination
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
protected ColumnCombinationBitset getNextChildColumnCombination(ColumnCombinationBitset column) {
if (column.size() == 1) {
return null;
}
Broadcast<ColumnCombinationBitset> bColumn = Singleton.getSparkContext().broadcast(column);
//if maximal negatives contain column return null
if(!this.maximalNegatives.filter((ColumnCombinationBitset ccb) -> ccb.equals(bColumn.value())).isEmpty()){
return null;
}
List<ColumnCombinationBitset> subsets = column.getDirectSubsets();
JavaRDD<ColumnCombinationBitset> subsetsRdd = Singleton.getSparkContext().parallelize(subsets);
//destroy broadcast variable
bColumn.destroy();
return this.findUnprunedSet(subsetsRdd);
}
示例13: parseFile
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
public JavaRDD<Adult> parseFile(){
JavaPairRDD<String, Long> temp = this.input.zipWithIndex();
Broadcast<ArrayList<Tuple2<String,Integer>>> bColNames = Singleton.getSparkContext().broadcast(this.columnNames);
JavaRDD<Adult> rdd_adults = temp.map((Tuple2<String, Long> tuple) -> {
String[] fields = tuple._1.split(",");
//turn array to list
List<String> temp1 = ImmutableList.copyOf(fields);
ArrayList<String> fieldsList = new ArrayList<>(temp1);
Adult adult = new Adult(bColNames.value(), fieldsList,
tuple._2.intValue());
return adult;
});
return rdd_adults;
}
示例14: doOperation
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
public void doOperation(final ImportJavaRDDOfElements operation, final Context context, final AccumuloStore store) throws OperationException {
final String outputPath = operation.getOption(OUTPUT_PATH);
if (null == outputPath || outputPath.isEmpty()) {
throw new OperationException("Option outputPath must be set for this option to be run against the accumulostore");
}
final String failurePath = operation.getOption(FAILURE_PATH);
if (null == failurePath || failurePath.isEmpty()) {
throw new OperationException("Option failurePath must be set for this option to be run against the accumulostore");
}
final SparkContext sparkContext = SparkContextUtil.getSparkSession(context, store.getProperties()).sparkContext();
final Broadcast<AccumuloElementConverter> broadcast = JavaSparkContext.fromSparkContext(sparkContext).broadcast(store.getKeyPackage().getKeyConverter());
final ElementConverterFunction func = new ElementConverterFunction(broadcast);
final JavaPairRDD<Key, Value> rdd = operation.getInput().flatMapToPair(func);
final ImportKeyValueJavaPairRDDToAccumulo op =
new ImportKeyValueJavaPairRDDToAccumulo.Builder()
.input(rdd)
.failurePath(failurePath)
.outputPath(outputPath)
.build();
store.execute(new OperationChain(op), context);
}
示例15: sliceOperations
import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
public T sliceOperations(long rl, long ru, long cl, long cu, T block)
throws DMLRuntimeException
{
T ret = null;
for( Broadcast<PartitionedBlock<T>> bc : _pbc ) {
PartitionedBlock<T> pm = bc.value();
T tmp = pm.sliceOperations(rl, ru, cl, cu, block);
if( ret != null )
ret.merge(tmp, false);
else
ret = tmp;
}
return ret;
}