本文整理汇总了Python中pyspark.SparkContext.range方法的典型用法代码示例。如果您正苦于以下问题:Python SparkContext.range方法的具体用法?Python SparkContext.range怎么用?Python SparkContext.range使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyspark.SparkContext
的用法示例。
在下文中一共展示了SparkContext.range方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: SearchTiles_and_Factorize
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import range [as 别名]
def SearchTiles_and_Factorize(n):
global globalmergedtiles
global globalcoordinates
global factors_accum
global spcon
spcon = SparkContext("local[4]","Spark_TileSearch_Optimized")
if persisted_tiles == True:
tileintervalsf=open("/home/shrinivaasanka/Krishna_iResearch_OpenSource/GitHub/asfer-github-code/cpp-src/miscellaneous/DiscreteHyperbolicFactorizationUpperbound_TileSearch_Optimized.tileintervals","r")
tileintervalslist=tileintervalsf.read().split("\n")
#print "tileintervalslist=",tileintervalslist
tileintervalslist_accum=spcon.accumulator(tilesintervalslist, VectorAccumulatorParam())
paralleltileintervals=spcon.parallelize(tileintervalslist)
paralleltileintervals.foreach(tilesearch)
else:
factorsfile=open("DiscreteHyperbolicFactorizationUpperbound_TileSearch_Optimized.factors","w")
hardy_ramanujan_ray_shooting_queries(n)
hardy_ramanujan_prime_number_theorem_ray_shooting_queries(n)
baker_harman_pintz_ray_shooting_queries(n)
cramer_ray_shooting_queries(n)
zhang_ray_shooting_queries(n)
factors_accum=spcon.accumulator(factors_of_n, FactorsAccumulatorParam())
#spcon.parallelize(xrange(1,n)).foreach(tilesearch_nonpersistent)
spcon.parallelize(spcon.range(1,n).collect()).foreach(tilesearch_nonpersistent)
print "factors_accum.value = ", factors_accum.value
factors=[]
factordict={}
for f in factors_accum.value:
factors += f
factordict[n]=factors
json.dump(factordict,factorsfile)
return factors
开发者ID:shrinivaasanka,项目名称:asfer-github-code,代码行数:36,代码来源:DiscreteHyperbolicFactorizationUpperbound_TileSearch_Optimized.py
示例2: SparkContext
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import range [as 别名]
from __future__ import print_function
import sys
from pyspark import SparkContext
from pyspark.streaming import StreamingContext
if __name__ == "__main__":
sc = SparkContext(appName="PythonStreamingNetworkWordCount")
rdd = sc.range(1, 1000)
counts = rdd.map(lambda i: i * 2)
counts.saveAsTextFile("s3://uryyyyyyy-sandbox/py.log")
示例3: SparkContext
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import range [as 别名]
from pyspark import SparkContext
from pyspark.sql import SQLContext
# setup spark context
from pyspark.sql.types import StructType, StructField, StringType
sc = SparkContext("local", "data_processor")
sqlC = SQLContext(sc)
# create dummy data frames
rdd1 = sc.range(0,10000000).map(lambda x: ("key "+str(x), x)).repartition(100)
rdd2 = sc.range(0,10000).map(lambda x: ("key "+str(x), x)).repartition(10)
# Define schema
schema = StructType([
StructField("Id", StringType(), True),
StructField("Packsize", StringType(), True)
])
schema2 = StructType([
StructField("Id2", StringType(), True),
StructField("Packsize", StringType(), True)
])
df1 = sqlC.createDataFrame(rdd1,schema)
df2 = sqlC.createDataFrame(rdd2,schema2)
print df1.rdd.getNumPartitions()
print df2.rdd.getNumPartitions()