当前位置: 首页>>代码示例>>Java>>正文


Java Broadcast类代码示例

本文整理汇总了Java中org.apache.spark.broadcast.Broadcast的典型用法代码示例。如果您正苦于以下问题:Java Broadcast类的具体用法?Java Broadcast怎么用?Java Broadcast使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


Broadcast类属于org.apache.spark.broadcast包,在下文中一共展示了Broadcast类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: setPartitionHeaders

import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
public static JavaRDD<SAMRecord> setPartitionHeaders(final JavaRDD<SAMRecord> reads, final Broadcast<SAMFileHeader> header) {

        return reads.mapPartitions(records -> {
            //header.getValue().setTextHeader(header.getValue().getTextHeader()+"\\[email protected]\\tSN:"+records..getReferenceName());
            //record.setHeader(header);

            BAMHeaderOutputFormat.setHeader(header.getValue());
            return records;
        });
    }
 
开发者ID:NGSeq,项目名称:ViraPipe,代码行数:11,代码来源:HDFSWriter.java

示例2: run

import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
/**
 * 
 * @param topKvalueCandidates the topK results per entity, acquired from value similarity
 * @param rawTriples1 the rdf triples of the first entity collection
 * @param rawTriples2 the rdf triples of the second entity collection
 * @param SEPARATOR the delimiter that separates subjects, predicates and objects in the rawTriples1 and rawTriples2 files
 * @param entityIds1 the mapping of entity urls to entity ids, as it was used in blocking
 * @param entityIds2
 * @param MIN_SUPPORT_THRESHOLD the minimum support threshold, below which, relations are discarded from top relations
 * @param K the K for topK candidate matches
 * @param N the N for topN rdf relations (and neighbors)
 * @param jsc the java spark context used to load files and broadcast variables
 * @return topK neighbor candidates per entity
 */
public JavaPairRDD<Integer, IntArrayList> run(JavaPairRDD<Integer,Int2FloatLinkedOpenHashMap> topKvalueCandidates, 
        JavaRDD<String> rawTriples1, 
        JavaRDD<String> rawTriples2,             
        String SEPARATOR, 
        JavaRDD<String> entityIds1, 
        JavaRDD<String> entityIds2, 
        float MIN_SUPPORT_THRESHOLD,
        int K,
        int N, 
        JavaSparkContext jsc) {
    
    Map<Integer,IntArrayList> inNeighbors = new HashMap<>(new RelationsRank().run(rawTriples1, SEPARATOR, entityIds1, MIN_SUPPORT_THRESHOLD, N, true, jsc));
    inNeighbors.putAll(new RelationsRank().run(rawTriples2, SEPARATOR, entityIds2, MIN_SUPPORT_THRESHOLD, N, false, jsc));
    
    Broadcast<Map<Integer,IntArrayList>> inNeighbors_BV = jsc.broadcast(inNeighbors);             
    JavaPairRDD<Integer, IntArrayList> topKneighborCandidates =  getTopKNeighborSimsSUM(topKvalueCandidates, inNeighbors_BV, K);        
    return topKneighborCandidates;
}
 
开发者ID:vefthym,项目名称:MinoanER,代码行数:33,代码来源:CNPARCS.java

示例3: main

import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
public static void main(String[] args) {
		
	
//	SparkConf conf = new SparkConf().setMaster("local").setAppName("BroadCasting");
//	JavaSparkContext jsc = new JavaSparkContext(conf);
//	
//	Broadcast<String> broadcastVar = jsc.broadcast("Hello Spark");
//	
	 SparkSession sparkSession = SparkSession.builder().master("local").appName("My App")
			 .config("spark.sql.warehouse.dir", "file:////C:/Users/sgulati/spark-warehouse").getOrCreate();
	 
	 Broadcast<String> broadcastVar= sparkSession.sparkContext().broadcast("Hello Spark",  scala.reflect.ClassTag$.MODULE$.apply(String.class));
	 System.out.println(broadcastVar.getValue());
	 
	 broadcastVar.unpersist();
	// broadcastVar.unpersist(true);
	 broadcastVar.destroy();
	
	}
 
开发者ID:PacktPublishing,项目名称:Apache-Spark-2x-for-Java-Developers,代码行数:20,代码来源:BroadcastVariable.java

示例4: readsToWritable

import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
public static JavaPairRDD<SAMRecord, SAMRecordWritable> readsToWritable(JavaRDD<SAMRecord> records, Broadcast<SAMFileHeader> header) {
    return records.mapToPair(read -> {

        //SEQUENCE DICTIONARY must be set here for the alignment because it's not given as header file
        //Set in alignment to sam map phase
        if(header.getValue().getSequenceDictionary()==null) header.getValue().setSequenceDictionary(new SAMSequenceDictionary());
        if(header.getValue().getSequenceDictionary().getSequence(read.getReferenceName())==null)
            header.getValue().getSequenceDictionary().addSequence(new SAMSequenceRecord(read.getReferenceName()));

        //read.setHeader(read.getHeader());
        read.setHeaderStrict(header.getValue());
        final SAMRecordWritable samRecordWritable = new SAMRecordWritable();
        samRecordWritable.set(read);
        return new Tuple2<>(read, samRecordWritable);
    });
}
 
开发者ID:NGSeq,项目名称:ViraPipe,代码行数:17,代码来源:HDFSWriter.java

示例5: run

import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
/**
 * 
 * @param topKvalueCandidates the topK results per entity, acquired from value similarity
 * @param rawTriples1 the rdf triples of the first entity collection
 * @param rawTriples2 the rdf triples of the second entity collection
 * @param SEPARATOR the delimiter that separates subjects, predicates and objects in the rawTriples1 and rawTriples2 files
 * @param entityIds1 the mapping of entity urls to entity ids, as it was used in blocking
 * @param entityIds2
 * @param MIN_SUPPORT_THRESHOLD the minimum support threshold, below which, relations are discarded from top relations
 * @param K the K for topK candidate matches
 * @param N the N for topN rdf relations (and neighbors)
 * @param jsc the java spark context used to load files and broadcast variables
 * @return topK neighbor candidates per entity
 */
public JavaPairRDD<Integer, IntArrayList> run(JavaPairRDD<Integer,Int2FloatLinkedOpenHashMap> topKvalueCandidates, 
        JavaRDD<String> rawTriples1, 
        JavaRDD<String> rawTriples2,             
        String SEPARATOR, 
        JavaRDD<String> entityIds1, 
        JavaRDD<String> entityIds2, 
        float MIN_SUPPORT_THRESHOLD,
        int K,
        int N, 
        JavaSparkContext jsc) {
    
    Map<Integer,IntArrayList> inNeighbors = new HashMap<>(new RelationsRank().run(rawTriples1, SEPARATOR, entityIds1, MIN_SUPPORT_THRESHOLD, N, true, jsc));
    inNeighbors.putAll(new RelationsRank().run(rawTriples2, SEPARATOR, entityIds2, MIN_SUPPORT_THRESHOLD, N, false, jsc));
    
    Broadcast<Map<Integer,IntArrayList>> inNeighbors_BV = jsc.broadcast(inNeighbors);

    //JavaPairRDD<Integer, IntArrayList> topKneighborCandidates =  getTopKNeighborSims(topKvalueCandidates, inNeighbors_BV, K);        
    JavaPairRDD<Integer, IntArrayList> topKneighborCandidates =  getTopKNeighborSimsSUM(topKvalueCandidates, inNeighbors_BV, K);        
    return topKneighborCandidates;
}
 
开发者ID:vefthym,项目名称:MinoanER,代码行数:35,代码来源:CNPNeighborsUnnormalized.java

示例6: run2

import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
/**
 * 
 * @param topKvalueCandidates the topK results per entity, acquired from value similarity
 * @param rawTriples1 the rdf triples of the first entity collection
 * @param rawTriples2 the rdf triples of the second entity collection
 * @param SEPARATOR the delimiter that separates subjects, predicates and objects in the rawTriples1 and rawTriples2 files
 * @param entityIds1 the mapping of entity urls to entity ids, as it was used in blocking
 * @param entityIds2
 * @param MIN_SUPPORT_THRESHOLD the minimum support threshold, below which, relations are discarded from top relations
 * @param K the K for topK candidate matches
 * @param N the N for topN rdf relations (and neighbors)
 * @param jsc the java spark context used to load files and broadcast variables
 * @return topK neighbor candidates per entity
 */
public JavaPairRDD<Integer, Int2FloatLinkedOpenHashMap> run2(JavaPairRDD<Integer,Int2FloatLinkedOpenHashMap> topKvalueCandidates, 
        JavaRDD<String> rawTriples1, 
        JavaRDD<String> rawTriples2,             
        String SEPARATOR, 
        JavaRDD<String> entityIds1, 
        JavaRDD<String> entityIds2, 
        float MIN_SUPPORT_THRESHOLD,
        int K,
        int N, 
        JavaSparkContext jsc) {
    
    Map<Integer,IntArrayList> inNeighbors = new HashMap<>(new RelationsRank().run(rawTriples1, SEPARATOR, entityIds1, MIN_SUPPORT_THRESHOLD, N, true, jsc));
    inNeighbors.putAll(new RelationsRank().run(rawTriples2, SEPARATOR, entityIds2, MIN_SUPPORT_THRESHOLD, N, false, jsc));
    
    Broadcast<Map<Integer,IntArrayList>> inNeighbors_BV = jsc.broadcast(inNeighbors);             
    JavaPairRDD<Integer, Int2FloatLinkedOpenHashMap> topKneighborCandidates =  getTopKNeighborSimsSUMWithScores(topKvalueCandidates, inNeighbors_BV, K);        
    return topKneighborCandidates;
}
 
开发者ID:vefthym,项目名称:MinoanER,代码行数:33,代码来源:CNPARCS.java

示例7: run

import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
/**
 * 
 * @param topKvalueCandidates the topK results per entity, acquired from value similarity
 * @param rawTriples1 the rdf triples of the first entity collection
 * @param rawTriples2 the rdf triples of the second entity collection
 * @param SEPARATOR the delimiter that separates subjects, predicates and objects in the rawTriples1 and rawTriples2 files
 * @param entityIds1 the mapping of entity urls to entity ids, as it was used in blocking
 * @param entityIds2
 * @param MIN_SUPPORT_THRESHOLD the minimum support threshold, below which, relations are discarded from top relations
 * @param K the K for topK candidate matches
 * @param N the N for topN rdf relations (and neighbors)
 * @param jsc the java spark context used to load files and broadcast variables
 * @return topK neighbor candidates per entity
 */
public JavaPairRDD<Integer, IntArrayList> run(JavaPairRDD<Integer,Int2FloatLinkedOpenHashMap> topKvalueCandidates, 
        JavaRDD<String> rawTriples1, 
        JavaRDD<String> rawTriples2,             
        String SEPARATOR, 
        JavaRDD<String> entityIds1, 
        JavaRDD<String> entityIds2, 
        float MIN_SUPPORT_THRESHOLD,
        int K,
        int N, 
        JavaSparkContext jsc) {
    
    Map<Integer,IntArrayList> inNeighbors = new HashMap<>(new RelationsRank().run(rawTriples1, SEPARATOR, entityIds1, MIN_SUPPORT_THRESHOLD, N, true, jsc));
    inNeighbors.putAll(new RelationsRank().run(rawTriples2, SEPARATOR, entityIds2, MIN_SUPPORT_THRESHOLD, N, false, jsc));
    
    Broadcast<Map<Integer,IntArrayList>> inNeighbors_BV = jsc.broadcast(inNeighbors);
    
    //JavaPairRDD<Tuple2<Integer, Integer>, Float> neighborSims = getNeighborSims(topKvalueCandidates, inNeighbors_BV);        
    //JavaPairRDD<Integer, IntArrayList> topKneighborCandidates =  getTopKNeighborSimsOld(neighborSims, K);        
    JavaPairRDD<Integer, IntArrayList> topKneighborCandidates =  getTopKNeighborSims(topKvalueCandidates, inNeighbors_BV, K);        
    return topKneighborCandidates;
}
 
开发者ID:vefthym,项目名称:MinoanER,代码行数:36,代码来源:CNPNeighbors.java

示例8: readAndConvertFeatureRDD

import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
private static RDD<Tuple2<Object,double[]>> readAndConvertFeatureRDD(
    JavaPairRDD<String,float[]> javaRDD,
    Broadcast<Map<String,Integer>> bIdToIndex) {

  RDD<Tuple2<Integer,double[]>> scalaRDD = javaRDD.mapToPair(t ->
      new Tuple2<>(bIdToIndex.value().get(t._1()), t._2())
  ).mapValues(f -> {
      double[] d = new double[f.length];
      for (int i = 0; i < d.length; i++) {
        d[i] = f[i];
      }
      return d;
    }
  ).rdd();

  // This mimics the persistence level establish by ALS training methods
  scalaRDD.persist(StorageLevel.MEMORY_AND_DISK());

  @SuppressWarnings("unchecked")
  RDD<Tuple2<Object,double[]>> objKeyRDD = (RDD<Tuple2<Object,double[]>>) (RDD<?>) scalaRDD;
  return objKeyRDD;
}
 
开发者ID:oncewang,项目名称:oryx2,代码行数:23,代码来源:ALSUpdate.java

示例9: getRdfsLabels

import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
/**
 * Get map of rdfs:labels for specified URIs
 *
 * @param quads Quads to use for retrieving labels
 * @param uris  Set of URI Strings to find labels for
 * @return map of URI -&gt; rdfs:label
 */
private Map<String, String> getRdfsLabels(JavaRDD<Quad> quads, Set<String> uris) {
    Broadcast<Set<String>> broadcastURIs = sc.broadcast(uris);
    Map<String, String> nonSerializableMap = quads.filter(quad ->
                    // filter out label predicates for specified subject URIs
                    quad.getPredicate().isURI() &&
                            quad.getPredicate().getURI().equals(LABEL_URI) &&
                            quad.getSubject().isURI() &&
                            (broadcastURIs.getValue().contains(quad.getSubject().getURI()))
            // map to pair of uri, label
    ).mapToPair(quad -> new Tuple2<>(
            quad.getSubject().getURI(),
            quad.getObject().getLiteralValue().toString()
    )).collectAsMap();

    return new HashMap<>(nonSerializableMap);
}
 
开发者ID:Merck,项目名称:rdf2x,代码行数:24,代码来源:RdfSchemaCollector.java

示例10: find

import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
protected boolean find(ColumnCombinationBitset columnCombination) {
    
    if(this.columnCombinationMap.isEmpty()){
        return false;
    }
    else{
        Broadcast<ColumnCombinationBitset> bCcb = Singleton.getSparkContext().broadcast(columnCombination);
        JavaRDD<ColumnCombinationBitset> check = this.columnCombinationMap.filter((ColumnCombinationBitset ccb) -> ccb.equals((Object) bCcb.value()));
        if(check.isEmpty()){
            bCcb.destroy();
            return false;
        }
        else{
            bCcb.destroy();
            return true;
        }
        
    }
}
 
开发者ID:mpoiitis,项目名称:DUCCspark,代码行数:20,代码来源:SimplePruningGraph.java

示例11: getNextParentColumnCombination

import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
protected ColumnCombinationBitset getNextParentColumnCombination(ColumnCombinationBitset column) {
    
    Broadcast<ColumnCombinationBitset> bColumn = Singleton.getSparkContext().broadcast(column);
   
    //if minimal positives contain column return null
    if(!this.minimalPositives.filter((ColumnCombinationBitset ccb) -> ccb.equals(bColumn.value())).isEmpty()){
        return null;
    }
    List<ColumnCombinationBitset> supersets = column.getDirectSupersets(this.bitmaskForNonUniqueColumns);
    JavaRDD<ColumnCombinationBitset> supersetsRdd = Singleton.getSparkContext().parallelize(supersets);
    
    //destroy broadcast variable
    bColumn.destroy();
    
    return this.findUnprunedSet(supersetsRdd);
}
 
开发者ID:mpoiitis,项目名称:DUCCspark,代码行数:17,代码来源:SimpleGraphTraverser.java

示例12: getNextChildColumnCombination

import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
protected ColumnCombinationBitset getNextChildColumnCombination(ColumnCombinationBitset column) {
    if (column.size() == 1) {
        return null;
    }
    Broadcast<ColumnCombinationBitset> bColumn = Singleton.getSparkContext().broadcast(column);
    
    //if maximal negatives contain column return null
    if(!this.maximalNegatives.filter((ColumnCombinationBitset ccb) -> ccb.equals(bColumn.value())).isEmpty()){
        return null;
    }
    
    List<ColumnCombinationBitset> subsets = column.getDirectSubsets();
    JavaRDD<ColumnCombinationBitset> subsetsRdd = Singleton.getSparkContext().parallelize(subsets);
    
    //destroy broadcast variable
    bColumn.destroy();
    
    return this.findUnprunedSet(subsetsRdd);
}
 
开发者ID:mpoiitis,项目名称:DUCCspark,代码行数:20,代码来源:SimpleGraphTraverser.java

示例13: parseFile

import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
public JavaRDD<Adult> parseFile(){
    
    JavaPairRDD<String, Long> temp = this.input.zipWithIndex();

    Broadcast<ArrayList<Tuple2<String,Integer>>> bColNames = Singleton.getSparkContext().broadcast(this.columnNames);
    
    JavaRDD<Adult> rdd_adults = temp.map((Tuple2<String, Long> tuple) -> {
        String[] fields = tuple._1.split(",");
        //turn array to list
        List<String> temp1 = ImmutableList.copyOf(fields);
        ArrayList<String> fieldsList = new ArrayList<>(temp1);
        Adult adult = new Adult(bColNames.value(), fieldsList,
                tuple._2.intValue());
        return adult;
    });
    return rdd_adults;
}
 
开发者ID:mpoiitis,项目名称:DUCCspark,代码行数:18,代码来源:CustomParser.java

示例14: doOperation

import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
public void doOperation(final ImportJavaRDDOfElements operation, final Context context, final AccumuloStore store) throws OperationException {
    final String outputPath = operation.getOption(OUTPUT_PATH);
    if (null == outputPath || outputPath.isEmpty()) {
        throw new OperationException("Option outputPath must be set for this option to be run against the accumulostore");
    }
    final String failurePath = operation.getOption(FAILURE_PATH);
    if (null == failurePath || failurePath.isEmpty()) {
        throw new OperationException("Option failurePath must be set for this option to be run against the accumulostore");
    }

    final SparkContext sparkContext = SparkContextUtil.getSparkSession(context, store.getProperties()).sparkContext();
    final Broadcast<AccumuloElementConverter> broadcast = JavaSparkContext.fromSparkContext(sparkContext).broadcast(store.getKeyPackage().getKeyConverter());
    final ElementConverterFunction func = new ElementConverterFunction(broadcast);
    final JavaPairRDD<Key, Value> rdd = operation.getInput().flatMapToPair(func);
    final ImportKeyValueJavaPairRDDToAccumulo op =
            new ImportKeyValueJavaPairRDDToAccumulo.Builder()
                    .input(rdd)
                    .failurePath(failurePath)
                    .outputPath(outputPath)
                    .build();
    store.execute(new OperationChain(op), context);
}
 
开发者ID:gchq,项目名称:Gaffer,代码行数:23,代码来源:ImportJavaRDDOfElementsHandler.java

示例15: sliceOperations

import org.apache.spark.broadcast.Broadcast; //导入依赖的package包/类
public T sliceOperations(long rl, long ru, long cl, long cu, T block) 
		throws DMLRuntimeException 
{
	T ret = null;
	
	for( Broadcast<PartitionedBlock<T>> bc : _pbc ) {
		PartitionedBlock<T> pm = bc.value();
		T tmp = pm.sliceOperations(rl, ru, cl, cu, block);
		if( ret != null )
			ret.merge(tmp, false);
		else
			ret = tmp;
	}
	
	return ret;
}
 
开发者ID:apache,项目名称:systemml,代码行数:17,代码来源:PartitionedBroadcast.java


注:本文中的org.apache.spark.broadcast.Broadcast类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。