当前位置: 首页>>代码示例>>Python>>正文


Python Context.parallelize方法代码示例

本文整理汇总了Python中pysparkling.Context.parallelize方法的典型用法代码示例。如果您正苦于以下问题:Python Context.parallelize方法的具体用法?Python Context.parallelize怎么用?Python Context.parallelize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pysparkling.Context的用法示例。


在下文中一共展示了Context.parallelize方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_first_mp

# 需要导入模块: from pysparkling import Context [as 别名]
# 或者: from pysparkling.Context import parallelize [as 别名]
def test_first_mp():
    p = multiprocessing.Pool(4)
    c = Context(pool=p, serializer=cloudpickle.dumps,
                deserializer=pickle.loads)
    my_rdd = c.parallelize([1, 2, 2, 4, 1, 3, 5, 9], 3)
    print(my_rdd.first())
    assert my_rdd.first() == 1
开发者ID:nicoheidtke,项目名称:pysparkling,代码行数:9,代码来源:test_multiprocessing.py

示例2: test_multiprocessing

# 需要导入模块: from pysparkling import Context [as 别名]
# 或者: from pysparkling.Context import parallelize [as 别名]
def test_multiprocessing():
    p = multiprocessing.Pool(4)
    c = Context(pool=p, serializer=dill.dumps, deserializer=dill.loads)
    my_rdd = c.parallelize([1, 3, 4])
    r = my_rdd.map(lambda x: x*x).collect()
    print(r)
    assert 16 in r
开发者ID:gitter-badger,项目名称:pysparkling,代码行数:9,代码来源:test_multiprocessing.py

示例3: RDDTest

# 需要导入模块: from pysparkling import Context [as 别名]
# 或者: from pysparkling.Context import parallelize [as 别名]
class RDDTest(unittest.TestCase):
    """Tests for the resilient distributed databases"""

    def setUp(self):
        self.context = Context()

    def testLeftOuterJoinSimple(self):
        """Test the basic left outer join with simple key-value pairs"""
        x = self.context.parallelize([('a', 'xa'), ('b', 'xb'), ('c', 'xc')])
        y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
        z = self.context.parallelize([('c', 'zc'), ('d', 'zd')])

        xy = sorted(x.leftOuterJoin(y).collect())
        xz = sorted(x.leftOuterJoin(z).collect())
        zx = sorted(z.leftOuterJoin(x).collect())

        self.assertEqual(xy, [('a', ('xa', None)),
                              ('b', ('xb', 'yb')),
                              ('c', ('xc', 'yc'))])

        self.assertEqual(xz, [('a', ('xa', None)),
                              ('b', ('xb', None)),
                              ('c', ('xc', 'zc'))])

        self.assertEqual(zx, [('c', ('zc', 'xc')),
                              ('d', ('zd', None))])

    def testLeftOuterJoinDuplicate(self):
        """Test the left outer join with duplicate keys"""
        x = self.context.parallelize([('a', 'xa'), ('c', 'xc1'), ('c', 'xc2')])
        y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
        z = self.context.parallelize([('c', 'zc1'), ('c', 'zc2'), ('d', 'zd')])

        xy = sorted(x.leftOuterJoin(y).collect())
        xz = sorted(x.leftOuterJoin(z).collect())

        self.assertEqual(xy, [('a', ('xa', None)),
                              ('c', ('xc1', 'yc')),
                              ('c', ('xc2', 'yc'))])

        # Two sets of duplicate keys gives cartesian product
        self.assertEqual(xz, [('a', ('xa', None)),
                              ('c', ('xc1', 'zc1')),
                              ('c', ('xc1', 'zc2')),
                              ('c', ('xc2', 'zc1')),
                              ('c', ('xc2', 'zc2'))])

    def testRightOuterJoinSimple(self):
        """Test the basic right outer join with simple key-value pairs"""
        x = self.context.parallelize([('a', 'xa'), ('b', 'xb'), ('c', 'xc')])
        y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
        z = self.context.parallelize([('c', 'zc'), ('d', 'zd')])

        xy = sorted(x.rightOuterJoin(y).collect())
        xz = sorted(x.rightOuterJoin(z).collect())
        zx = sorted(z.rightOuterJoin(x).collect())

        self.assertEqual(xy, [('b', ('xb', 'yb')),
                              ('c', ('xc', 'yc'))])

        self.assertEqual(xz, [('c', ('xc', 'zc')),
                              ('d', (None, 'zd'))])

        self.assertEqual(zx, [('a', (None, 'xa')),
                              ('b', (None, 'xb')),
                              ('c', ('zc', 'xc'))])

    def testRightOuterJoinDuplicate(self):
        """Test the right outer join with duplicate keys"""
        x = self.context.parallelize([('a', 'xa'), ('c', 'xc1'), ('c', 'xc2')])
        y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
        z = self.context.parallelize([('c', 'zc1'), ('c', 'zc2'), ('d', 'zd')])

        xy = sorted(x.rightOuterJoin(y).collect())
        xz = sorted(x.rightOuterJoin(z).collect())

        self.assertEqual(xy, [('b', (None, 'yb')),
                              ('c', ('xc1', 'yc')),
                              ('c', ('xc2', 'yc'))])

        # Two sets of duplicate keys gives cartesian product
        self.assertEqual(xz, [('c', ('xc1', 'zc1')),
                              ('c', ('xc1', 'zc2')),
                              ('c', ('xc2', 'zc1')),
                              ('c', ('xc2', 'zc2')),
                              ('d', (None, 'zd'))])

    def testFullOuterJoinSimple(self):
        """Test the basic full outer join with simple key-value pairs"""
        x = self.context.parallelize([('a', 'xa'), ('b', 'xb'), ('c', 'xc')])
        y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
        z = self.context.parallelize([('c', 'zc'), ('d', 'zd')])

        xy = sorted(x.fullOuterJoin(y).collect())
        xz = sorted(x.fullOuterJoin(z).collect())
        zx = sorted(z.fullOuterJoin(x).collect())

        self.assertEqual(xy, [('a', ('xa', None)),
                              ('b', ('xb', 'yb')),
                              ('c', ('xc', 'yc'))])
#.........这里部分代码省略.........
开发者ID:alexprengere,项目名称:pysparkling,代码行数:103,代码来源:test_rdd.py

示例4: RDDTest

# 需要导入模块: from pysparkling import Context [as 别名]
# 或者: from pysparkling.Context import parallelize [as 别名]
class RDDTest(unittest.TestCase):
    """ Tests for the resilient distributed databases """

    def setUp(self):
        self.context = Context()

    def testLeftOuterJoinSimple(self):
        """ Test the basic left outer join with simple key-value pairs """
        x = self.context.parallelize([('a', 'xa'), ('b', 'xb'), ('c', 'xc')])
        y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
        z = self.context.parallelize([('c', 'zc'), ('d', 'zd')])

        xy = sorted(x.leftOuterJoin(y).collect())
        xz = sorted(x.leftOuterJoin(z).collect())
        zx = sorted(z.leftOuterJoin(x).collect())

        self.assertEqual(xy, [('a', ('xa', None)),
                              ('b', ('xb', 'yb')),
                              ('c', ('xc', 'yc'))])

        self.assertEqual(xz, [('a', ('xa', None)),
                              ('b', ('xb', None)),
                              ('c', ('xc', 'zc'))])

        self.assertEqual(zx, [('c', ('zc', 'xc')),
                              ('d', ('zd', None))])

    @unittest.skip("Known failure")
    def testLeftOuterJoinDuplicate(self):
        """ Test the left outer join with duplicate keys """
        x = self.context.parallelize([('a', 'xa'), ('c', 'xc1'), ('c', 'xc2')])
        y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
        z = self.context.parallelize([('c', 'zc1'), ('c', 'zc2'), ('d', 'zd')])

        xy = sorted(x.leftOuterJoin(y).collect())
        xz = sorted(x.leftOuterJoin(z).collect())

        self.assertEqual(xy, [('a', ('xa', None)),
                              ('c', ('xc1', 'yc')),
                              ('c', ('xc2', 'yc'))])

        # Two sets of duplicate keys gives cartesian product
        self.assertEqual(xz, [('a', ('xa', None)),
                              ('c', ('xc1', 'zc1')),
                              ('c', ('xc1', 'zc2')),
                              ('c', ('xc2', 'zc1')),
                              ('c', ('xc2', 'zc2'))])

    def testRightOuterJoinSimple(self):
        """ Test the basic right outer join with simple key-value pairs """
        x = self.context.parallelize([('a', 'xa'), ('b', 'xb'), ('c', 'xc')])
        y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
        z = self.context.parallelize([('c', 'zc'), ('d', 'zd')])

        xy = sorted(x.rightOuterJoin(y).collect())
        xz = sorted(x.rightOuterJoin(z).collect())
        zx = sorted(z.rightOuterJoin(x).collect())

        self.assertEqual(xy, [('b', ('xb', 'yb')),
                              ('c', ('xc', 'yc'))])

        self.assertEqual(xz, [('c', ('xc', 'zc')),
                              ('d', (None, 'zd'))])

        self.assertEqual(zx, [('a', (None, 'xa')),
                              ('b', (None, 'xb')),
                              ('c', ('zc', 'xc'))])

    @unittest.skip("Known failure")
    def testRightOuterJoinDuplicate(self):
        """ Test the right outer join with duplicate keys """
        x = self.context.parallelize([('a', 'xa'), ('c', 'xc1'), ('c', 'xc2')])
        y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
        z = self.context.parallelize([('c', 'zc1'), ('c', 'zc2'), ('d', 'zd')])

        xy = sorted(x.rightOuterJoin(y).collect())
        xz = sorted(x.rightOuterJoin(z).collect())

        self.assertEqual(xy, [('b', (None, 'yb')),
                              ('c', ('xc1', 'yc')),
                              ('c', ('xc2', 'yc'))])

        # Two sets of duplicate keys gives cartesian product
        self.assertEqual(xz, [('c', ('xc1', 'zc1')),
                              ('c', ('xc1', 'zc2')),
                              ('c', ('xc2', 'zc1')),
                              ('c', ('xc2', 'zc2')),
                              ('d', (None, 'zd'))])

    def testFullOuterJoinSimple(self):
        """ Test the basic full outer join with simple key-value pairs """
        x = self.context.parallelize([('a', 'xa'), ('b', 'xb'), ('c', 'xc')])
        y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
        z = self.context.parallelize([('c', 'zc'), ('d', 'zd')])

        xy = sorted(x.fullOuterJoin(y).collect())
        xz = sorted(x.fullOuterJoin(z).collect())
        zx = sorted(z.fullOuterJoin(x).collect())

        self.assertEqual(xy, [('a', ('xa', None)),
#.........这里部分代码省略.........
开发者ID:nicoheidtke,项目名称:pysparkling,代码行数:103,代码来源:test_rdd.py

示例5: print

# 需要导入模块: from pysparkling import Context [as 别名]
# 或者: from pysparkling.Context import parallelize [as 别名]
                currentUser = i.username
        if(count ==5):
                break

print('#### quantidade de lang=pt por hashtag #####')

def inserirByTag(x):
        print(x)
        session.execute("insert into resumebytag (uuid, hashtag, count) values (%s, %s, %s)", (random.randrange(10000, 30000), x[0], x[1]))

testeRDD = filter(lambda x: x[3] == 'pt', mostFollowersRDD)
teste2RDD = map(lambda x: (x[2], 1), testeRDD)

sc = Context()

teste3RDD = sc.parallelize(teste2RDD)

teste4RDD = teste3RDD.reduceByKey(lambda accum, n: accum + n)
teste4RDD.foreach(inserirByTag)


print('#### total de postagens/hora do dia #####')

def inserirByDayHour(x):
        print(x)
        session.execute("insert into resumebydayhour (uuid, dayhour, count) values (%s, %s, %s)", (random.randrange(10000, 30000), x[0], x[1]))

teste5RDD = sc.parallelize(mostFollowersRDD)

def agruparDate(x):
        diaHora = '{:%Y-%m-%d %H}'.format(x[1])
开发者ID:laerciowadie,项目名称:testeappSumarizador,代码行数:33,代码来源:sumarizador.py

示例6: test_union

# 需要导入模块: from pysparkling import Context [as 别名]
# 或者: from pysparkling.Context import parallelize [as 别名]
def test_union():
    sc = Context()
    rdd1 = sc.parallelize(["Hello"])
    rdd2 = sc.parallelize(["World"])
    union = sc.union([rdd1, rdd2]).collect()
    assert len(union) == 2 and "Hello" in union and "World" in union
开发者ID:nicoheidtke,项目名称:pysparkling,代码行数:8,代码来源:test_context_unit.py

示例7: RDDTest

# 需要导入模块: from pysparkling import Context [as 别名]
# 或者: from pysparkling.Context import parallelize [as 别名]
class RDDTest(unittest.TestCase):
    """ Tests for the resilient distributed databases """

    def setUp(self):
        self.context = Context()

    def testLeftOuterJoinSimple(self):
        """ Test the basic left outer join with simple key-value pairs """
        x = self.context.parallelize([('a', 'xa'), ('b', 'xb'), ('c', 'xc')])
        y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
        z = self.context.parallelize([('c', 'zc'), ('d', 'zd')])

        xy = sorted(x.leftOuterJoin(y).collect())
        xz = sorted(x.leftOuterJoin(z).collect())
        zx = sorted(z.leftOuterJoin(x).collect())

        self.assertEqual(xy, [('a', ('xa', None)),
                              ('b', ('xb', 'yb')),
                              ('c', ('xc', 'yc'))])

        self.assertEqual(xz, [('a', ('xa', None)),
                              ('b', ('xb', None)),
                              ('c', ('xc', 'zc'))])

        self.assertEqual(zx, [('c', ('zc', 'xc')),
                              ('d', ('zd', None))])

    def testLeftOuterJoinDuplicate(self):
        """ Test the left outer join with duplicate keys """
        x = self.context.parallelize([('a', 'xa'), ('c', 'xc1'), ('c', 'xc2')])
        y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
        z = self.context.parallelize([('c', 'zc1'), ('c', 'zc2'), ('d', 'zd')])

        xy = sorted(x.leftOuterJoin(y).collect())
        xz = sorted(x.leftOuterJoin(z).collect())

        self.assertEqual(xy, [('a', ('xa', None)),
                              ('c', ('xc1', 'yc')),
                              ('c', ('xc2', 'yc'))])

        # Two sets of duplicate keys gives cartesian product
        self.assertEqual(xz, [('a', ('xa', None)),
                              ('c', ('xc1', 'zc1')),
                              ('c', ('xc1', 'zc2')),
                              ('c', ('xc2', 'zc1')),
                              ('c', ('xc2', 'zc2'))])

    def testRightOuterJoinSimple(self):
        """ Test the basic right outer join with simple key-value pairs """
        x = self.context.parallelize([('a', 'xa'), ('b', 'xb'), ('c', 'xc')])
        y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
        z = self.context.parallelize([('c', 'zc'), ('d', 'zd')])

        xy = sorted(x.rightOuterJoin(y).collect())
        xz = sorted(x.rightOuterJoin(z).collect())
        zx = sorted(z.rightOuterJoin(x).collect())

        self.assertEqual(xy, [('b', ('xb', 'yb')),
                              ('c', ('xc', 'yc'))])

        self.assertEqual(xz, [('c', ('xc', 'zc')),
                              ('d', (None, 'zd'))])

        self.assertEqual(zx, [('a', (None, 'xa')),
                              ('b', (None, 'xb')),
                              ('c', ('zc', 'xc'))])

    def testRightOuterJoinDuplicate(self):
        """ Test the right outer join with duplicate keys """
        x = self.context.parallelize([('a', 'xa'), ('c', 'xc1'), ('c', 'xc2')])
        y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
        z = self.context.parallelize([('c', 'zc1'), ('c', 'zc2'), ('d', 'zd')])

        xy = sorted(x.rightOuterJoin(y).collect())
        xz = sorted(x.rightOuterJoin(z).collect())

        self.assertEqual(xy, [('b', (None, 'yb')),
                              ('c', ('xc1', 'yc')),
                              ('c', ('xc2', 'yc'))])

        # Two sets of duplicate keys gives cartesian product
        self.assertEqual(xz, [('c', ('xc1', 'zc1')),
                              ('c', ('xc1', 'zc2')),
                              ('c', ('xc2', 'zc1')),
                              ('c', ('xc2', 'zc2')),
                              ('d', (None, 'zd'))])

    def testFullOuterJoinSimple(self):
        """ Test the basic full outer join with simple key-value pairs """
        x = self.context.parallelize([('a', 'xa'), ('b', 'xb'), ('c', 'xc')])
        y = self.context.parallelize([('b', 'yb'), ('c', 'yc')])
        z = self.context.parallelize([('c', 'zc'), ('d', 'zd')])

        xy = sorted(x.fullOuterJoin(y).collect())
        xz = sorted(x.fullOuterJoin(z).collect())
        zx = sorted(z.fullOuterJoin(x).collect())

        self.assertEqual(xy, [('a', ('xa', None)),
                              ('b', ('xb', 'yb')),
                              ('c', ('xc', 'yc'))])
#.........这里部分代码省略.........
开发者ID:ainkov,项目名称:pysparkling,代码行数:103,代码来源:test_rdd.py


注:本文中的pysparkling.Context.parallelize方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。