本文整理汇总了Python中pysparkling.Context.parallelize方法的典型用法代码示例。如果您正苦于以下问题:Python Context.parallelize方法的具体用法?Python Context.parallelize怎么用?Python Context.parallelize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pysparkling.Context
的用法示例。
在下文中一共展示了Context.parallelize方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_first_mp
# 需要导入模块: from pysparkling import Context [as 别名]
# 或者: from pysparkling.Context import parallelize [as 别名]
def test_first_mp():
p = multiprocessing.Pool(4)
c = Context(pool=p, serializer=cloudpickle.dumps,
deserializer=pickle.loads)
my_rdd = c.parallelize([1, 2, 2, 4, 1, 3, 5, 9], 3)
print(my_rdd.first())
assert my_rdd.first() == 1
示例2: test_multiprocessing
# 需要导入模块: from pysparkling import Context [as 别名]
# 或者: from pysparkling.Context import parallelize [as 别名]
def test_multiprocessing():
p = multiprocessing.Pool(4)
c = Context(pool=p, serializer=dill.dumps, deserializer=dill.loads)
my_rdd = c.parallelize([1, 3, 4])
r = my_rdd.map(lambda x: x*x).collect()
print(r)
assert 16 in r
示例3: RDDTest
# 需要导入模块: from pysparkling import Context [as 别名]
# 或者: from pysparkling.Context import parallelize [as 别名]
class RDDTest(unittest.TestCase):
"""Tests for the resilient distributed databases"""
def setUp(self):
self.context = Context()
def testLeftOuterJoinSimple(self):
"""Test the basic left outer join with simple key-value pairs"""
x = self.context.parallelize([('a', 'xa'), ('b', 'xb'), ('c', 'xc')])
y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
z = self.context.parallelize([('c', 'zc'), ('d', 'zd')])
xy = sorted(x.leftOuterJoin(y).collect())
xz = sorted(x.leftOuterJoin(z).collect())
zx = sorted(z.leftOuterJoin(x).collect())
self.assertEqual(xy, [('a', ('xa', None)),
('b', ('xb', 'yb')),
('c', ('xc', 'yc'))])
self.assertEqual(xz, [('a', ('xa', None)),
('b', ('xb', None)),
('c', ('xc', 'zc'))])
self.assertEqual(zx, [('c', ('zc', 'xc')),
('d', ('zd', None))])
def testLeftOuterJoinDuplicate(self):
"""Test the left outer join with duplicate keys"""
x = self.context.parallelize([('a', 'xa'), ('c', 'xc1'), ('c', 'xc2')])
y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
z = self.context.parallelize([('c', 'zc1'), ('c', 'zc2'), ('d', 'zd')])
xy = sorted(x.leftOuterJoin(y).collect())
xz = sorted(x.leftOuterJoin(z).collect())
self.assertEqual(xy, [('a', ('xa', None)),
('c', ('xc1', 'yc')),
('c', ('xc2', 'yc'))])
# Two sets of duplicate keys gives cartesian product
self.assertEqual(xz, [('a', ('xa', None)),
('c', ('xc1', 'zc1')),
('c', ('xc1', 'zc2')),
('c', ('xc2', 'zc1')),
('c', ('xc2', 'zc2'))])
def testRightOuterJoinSimple(self):
"""Test the basic right outer join with simple key-value pairs"""
x = self.context.parallelize([('a', 'xa'), ('b', 'xb'), ('c', 'xc')])
y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
z = self.context.parallelize([('c', 'zc'), ('d', 'zd')])
xy = sorted(x.rightOuterJoin(y).collect())
xz = sorted(x.rightOuterJoin(z).collect())
zx = sorted(z.rightOuterJoin(x).collect())
self.assertEqual(xy, [('b', ('xb', 'yb')),
('c', ('xc', 'yc'))])
self.assertEqual(xz, [('c', ('xc', 'zc')),
('d', (None, 'zd'))])
self.assertEqual(zx, [('a', (None, 'xa')),
('b', (None, 'xb')),
('c', ('zc', 'xc'))])
def testRightOuterJoinDuplicate(self):
"""Test the right outer join with duplicate keys"""
x = self.context.parallelize([('a', 'xa'), ('c', 'xc1'), ('c', 'xc2')])
y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
z = self.context.parallelize([('c', 'zc1'), ('c', 'zc2'), ('d', 'zd')])
xy = sorted(x.rightOuterJoin(y).collect())
xz = sorted(x.rightOuterJoin(z).collect())
self.assertEqual(xy, [('b', (None, 'yb')),
('c', ('xc1', 'yc')),
('c', ('xc2', 'yc'))])
# Two sets of duplicate keys gives cartesian product
self.assertEqual(xz, [('c', ('xc1', 'zc1')),
('c', ('xc1', 'zc2')),
('c', ('xc2', 'zc1')),
('c', ('xc2', 'zc2')),
('d', (None, 'zd'))])
def testFullOuterJoinSimple(self):
"""Test the basic full outer join with simple key-value pairs"""
x = self.context.parallelize([('a', 'xa'), ('b', 'xb'), ('c', 'xc')])
y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
z = self.context.parallelize([('c', 'zc'), ('d', 'zd')])
xy = sorted(x.fullOuterJoin(y).collect())
xz = sorted(x.fullOuterJoin(z).collect())
zx = sorted(z.fullOuterJoin(x).collect())
self.assertEqual(xy, [('a', ('xa', None)),
('b', ('xb', 'yb')),
('c', ('xc', 'yc'))])
#.........这里部分代码省略.........
示例4: RDDTest
# 需要导入模块: from pysparkling import Context [as 别名]
# 或者: from pysparkling.Context import parallelize [as 别名]
class RDDTest(unittest.TestCase):
""" Tests for the resilient distributed databases """
def setUp(self):
self.context = Context()
def testLeftOuterJoinSimple(self):
""" Test the basic left outer join with simple key-value pairs """
x = self.context.parallelize([('a', 'xa'), ('b', 'xb'), ('c', 'xc')])
y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
z = self.context.parallelize([('c', 'zc'), ('d', 'zd')])
xy = sorted(x.leftOuterJoin(y).collect())
xz = sorted(x.leftOuterJoin(z).collect())
zx = sorted(z.leftOuterJoin(x).collect())
self.assertEqual(xy, [('a', ('xa', None)),
('b', ('xb', 'yb')),
('c', ('xc', 'yc'))])
self.assertEqual(xz, [('a', ('xa', None)),
('b', ('xb', None)),
('c', ('xc', 'zc'))])
self.assertEqual(zx, [('c', ('zc', 'xc')),
('d', ('zd', None))])
@unittest.skip("Known failure")
def testLeftOuterJoinDuplicate(self):
""" Test the left outer join with duplicate keys """
x = self.context.parallelize([('a', 'xa'), ('c', 'xc1'), ('c', 'xc2')])
y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
z = self.context.parallelize([('c', 'zc1'), ('c', 'zc2'), ('d', 'zd')])
xy = sorted(x.leftOuterJoin(y).collect())
xz = sorted(x.leftOuterJoin(z).collect())
self.assertEqual(xy, [('a', ('xa', None)),
('c', ('xc1', 'yc')),
('c', ('xc2', 'yc'))])
# Two sets of duplicate keys gives cartesian product
self.assertEqual(xz, [('a', ('xa', None)),
('c', ('xc1', 'zc1')),
('c', ('xc1', 'zc2')),
('c', ('xc2', 'zc1')),
('c', ('xc2', 'zc2'))])
def testRightOuterJoinSimple(self):
""" Test the basic right outer join with simple key-value pairs """
x = self.context.parallelize([('a', 'xa'), ('b', 'xb'), ('c', 'xc')])
y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
z = self.context.parallelize([('c', 'zc'), ('d', 'zd')])
xy = sorted(x.rightOuterJoin(y).collect())
xz = sorted(x.rightOuterJoin(z).collect())
zx = sorted(z.rightOuterJoin(x).collect())
self.assertEqual(xy, [('b', ('xb', 'yb')),
('c', ('xc', 'yc'))])
self.assertEqual(xz, [('c', ('xc', 'zc')),
('d', (None, 'zd'))])
self.assertEqual(zx, [('a', (None, 'xa')),
('b', (None, 'xb')),
('c', ('zc', 'xc'))])
@unittest.skip("Known failure")
def testRightOuterJoinDuplicate(self):
""" Test the right outer join with duplicate keys """
x = self.context.parallelize([('a', 'xa'), ('c', 'xc1'), ('c', 'xc2')])
y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
z = self.context.parallelize([('c', 'zc1'), ('c', 'zc2'), ('d', 'zd')])
xy = sorted(x.rightOuterJoin(y).collect())
xz = sorted(x.rightOuterJoin(z).collect())
self.assertEqual(xy, [('b', (None, 'yb')),
('c', ('xc1', 'yc')),
('c', ('xc2', 'yc'))])
# Two sets of duplicate keys gives cartesian product
self.assertEqual(xz, [('c', ('xc1', 'zc1')),
('c', ('xc1', 'zc2')),
('c', ('xc2', 'zc1')),
('c', ('xc2', 'zc2')),
('d', (None, 'zd'))])
def testFullOuterJoinSimple(self):
""" Test the basic full outer join with simple key-value pairs """
x = self.context.parallelize([('a', 'xa'), ('b', 'xb'), ('c', 'xc')])
y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
z = self.context.parallelize([('c', 'zc'), ('d', 'zd')])
xy = sorted(x.fullOuterJoin(y).collect())
xz = sorted(x.fullOuterJoin(z).collect())
zx = sorted(z.fullOuterJoin(x).collect())
self.assertEqual(xy, [('a', ('xa', None)),
#.........这里部分代码省略.........
示例5: print
# 需要导入模块: from pysparkling import Context [as 别名]
# 或者: from pysparkling.Context import parallelize [as 别名]
currentUser = i.username
if(count ==5):
break
print('#### quantidade de lang=pt por hashtag #####')
def inserirByTag(x):
print(x)
session.execute("insert into resumebytag (uuid, hashtag, count) values (%s, %s, %s)", (random.randrange(10000, 30000), x[0], x[1]))
testeRDD = filter(lambda x: x[3] == 'pt', mostFollowersRDD)
teste2RDD = map(lambda x: (x[2], 1), testeRDD)
sc = Context()
teste3RDD = sc.parallelize(teste2RDD)
teste4RDD = teste3RDD.reduceByKey(lambda accum, n: accum + n)
teste4RDD.foreach(inserirByTag)
print('#### total de postagens/hora do dia #####')
def inserirByDayHour(x):
print(x)
session.execute("insert into resumebydayhour (uuid, dayhour, count) values (%s, %s, %s)", (random.randrange(10000, 30000), x[0], x[1]))
teste5RDD = sc.parallelize(mostFollowersRDD)
def agruparDate(x):
diaHora = '{:%Y-%m-%d %H}'.format(x[1])
示例6: test_union
# 需要导入模块: from pysparkling import Context [as 别名]
# 或者: from pysparkling.Context import parallelize [as 别名]
def test_union():
sc = Context()
rdd1 = sc.parallelize(["Hello"])
rdd2 = sc.parallelize(["World"])
union = sc.union([rdd1, rdd2]).collect()
assert len(union) == 2 and "Hello" in union and "World" in union
示例7: RDDTest
# 需要导入模块: from pysparkling import Context [as 别名]
# 或者: from pysparkling.Context import parallelize [as 别名]
class RDDTest(unittest.TestCase):
""" Tests for the resilient distributed databases """
def setUp(self):
self.context = Context()
def testLeftOuterJoinSimple(self):
""" Test the basic left outer join with simple key-value pairs """
x = self.context.parallelize([('a', 'xa'), ('b', 'xb'), ('c', 'xc')])
y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
z = self.context.parallelize([('c', 'zc'), ('d', 'zd')])
xy = sorted(x.leftOuterJoin(y).collect())
xz = sorted(x.leftOuterJoin(z).collect())
zx = sorted(z.leftOuterJoin(x).collect())
self.assertEqual(xy, [('a', ('xa', None)),
('b', ('xb', 'yb')),
('c', ('xc', 'yc'))])
self.assertEqual(xz, [('a', ('xa', None)),
('b', ('xb', None)),
('c', ('xc', 'zc'))])
self.assertEqual(zx, [('c', ('zc', 'xc')),
('d', ('zd', None))])
def testLeftOuterJoinDuplicate(self):
""" Test the left outer join with duplicate keys """
x = self.context.parallelize([('a', 'xa'), ('c', 'xc1'), ('c', 'xc2')])
y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
z = self.context.parallelize([('c', 'zc1'), ('c', 'zc2'), ('d', 'zd')])
xy = sorted(x.leftOuterJoin(y).collect())
xz = sorted(x.leftOuterJoin(z).collect())
self.assertEqual(xy, [('a', ('xa', None)),
('c', ('xc1', 'yc')),
('c', ('xc2', 'yc'))])
# Two sets of duplicate keys gives cartesian product
self.assertEqual(xz, [('a', ('xa', None)),
('c', ('xc1', 'zc1')),
('c', ('xc1', 'zc2')),
('c', ('xc2', 'zc1')),
('c', ('xc2', 'zc2'))])
def testRightOuterJoinSimple(self):
""" Test the basic right outer join with simple key-value pairs """
x = self.context.parallelize([('a', 'xa'), ('b', 'xb'), ('c', 'xc')])
y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
z = self.context.parallelize([('c', 'zc'), ('d', 'zd')])
xy = sorted(x.rightOuterJoin(y).collect())
xz = sorted(x.rightOuterJoin(z).collect())
zx = sorted(z.rightOuterJoin(x).collect())
self.assertEqual(xy, [('b', ('xb', 'yb')),
('c', ('xc', 'yc'))])
self.assertEqual(xz, [('c', ('xc', 'zc')),
('d', (None, 'zd'))])
self.assertEqual(zx, [('a', (None, 'xa')),
('b', (None, 'xb')),
('c', ('zc', 'xc'))])
def testRightOuterJoinDuplicate(self):
""" Test the right outer join with duplicate keys """
x = self.context.parallelize([('a', 'xa'), ('c', 'xc1'), ('c', 'xc2')])
y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
z = self.context.parallelize([('c', 'zc1'), ('c', 'zc2'), ('d', 'zd')])
xy = sorted(x.rightOuterJoin(y).collect())
xz = sorted(x.rightOuterJoin(z).collect())
self.assertEqual(xy, [('b', (None, 'yb')),
('c', ('xc1', 'yc')),
('c', ('xc2', 'yc'))])
# Two sets of duplicate keys gives cartesian product
self.assertEqual(xz, [('c', ('xc1', 'zc1')),
('c', ('xc1', 'zc2')),
('c', ('xc2', 'zc1')),
('c', ('xc2', 'zc2')),
('d', (None, 'zd'))])
def testFullOuterJoinSimple(self):
""" Test the basic full outer join with simple key-value pairs """
x = self.context.parallelize([('a', 'xa'), ('b', 'xb'), ('c', 'xc')])
y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
z = self.context.parallelize([('c', 'zc'), ('d', 'zd')])
xy = sorted(x.fullOuterJoin(y).collect())
xz = sorted(x.fullOuterJoin(z).collect())
zx = sorted(z.fullOuterJoin(x).collect())
self.assertEqual(xy, [('a', ('xa', None)),
('b', ('xb', 'yb')),
('c', ('xc', 'yc'))])
#.........这里部分代码省略.........