Python random.RandomRDDs类代码示例

本文整理汇总了Python中pyspark.mllib.random.RandomRDDs类的典型用法代码示例。如果您正苦于以下问题：Python RandomRDDs类的具体用法？Python RandomRDDs怎么用？Python RandomRDDs使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了RandomRDDs类的8个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_col_norms

    def test_col_norms(self):
        data = RandomRDDs.normalVectorRDD(self.sc, 1000, 10, 10)
        summary = Statistics.colStats(data)
        self.assertEqual(10, len(summary.normL1()))
        self.assertEqual(10, len(summary.normL2()))

        data2 = self.sc.parallelize(range(10)).map(lambda x: Vectors.dense(x))
        summary2 = Statistics.colStats(data2)
        self.assertEqual(array([45.0]), summary2.normL1())
        import math
        expectedNormL2 = math.sqrt(sum(map(lambda x: x*x, range(10))))
        self.assertTrue(math.fabs(summary2.normL2()[0] - expectedNormL2) < 1e-14)

开发者ID:HodaAlemi，项目名称:spark，代码行数:12，代码来源:tests.py

示例2: test_col_with_different_rdds

 def test_col_with_different_rdds(self):
     # numpy
     data = RandomRDDs.normalVectorRDD(self.sc, 1000, 10, 10)
     summary = Statistics.colStats(data)
     self.assertEqual(1000, summary.count())
     # array
     data = self.sc.parallelize([range(10)] * 10)
     summary = Statistics.colStats(data)
     self.assertEqual(10, summary.count())
     # array
     data = self.sc.parallelize([pyarray.array("d", range(10))] * 10)
     summary = Statistics.colStats(data)
     self.assertEqual(10, summary.count())

开发者ID:greatyan，项目名称:spark，代码行数:13，代码来源:tests.py

示例3: exec

#A script to execute kmeans clustering in spark
#to run enter: >>> exec(open("./dokmeans.py").read())

import numpy as np
from pyspark.mllib.linalg import Vectors
from pyspark.mllib.clustering import KMeans


#generate random data RDD we need this package
from pyspark.mllib.random import RandomRDDs

#let's generate random class data, add in a cluster center to random 2D points

#use default num of partitions, or use a definte number to make it so that the union
#  will have samples across clusters
c1_v=RandomRDDs.normalVectorRDD(sc,20,2,numPartitions=2,seed=1L).map(lambda v:np.add([1,5],v))
c2_v=RandomRDDs.normalVectorRDD(sc,16,2,numPartitions=2,seed=2L).map(lambda v:np.add([5,1],v))
c3_v=RandomRDDs.normalVectorRDD(sc,12,2,numPartitions=2,seed=3L).map(lambda v:np.add([4,6],v))

#concatenate 2 RDDs with  .union(other) function
c12    =c1_v.union(c2_v)
my_data=c12.union(c3_v)   #this now has all points, as RDD


my_kmmodel = KMeans.train(my_data,k=1,
               maxIterations=20,runs=1,
               initializationMode='k-means||',seed=10L)

#try: help(KMeans.train)  to see parameter options
#k is the number of desired clusters.
#maxIterations is the maximum number of iterations to run.

开发者ID:abhijeetk，项目名称:Big-Data-Specialization，代码行数:31，代码来源:dokmeans.py

示例4: len

from pyspark import SparkContext
from pyspark.mllib.random import RandomRDDs


if __name__ == "__main__":
    if len(sys.argv) not in [1, 2]:
        print("Usage: random_rdd_generation", file=sys.stderr)
        sys.exit(-1)

    sc = SparkContext(appName="PythonRandomRDDGeneration")

    numExamples = 10000  # number of examples to generate
    fraction = 0.1  # fraction of data to sample

    # Example: RandomRDDs.normalRDD
    normalRDD = RandomRDDs.normalRDD(sc, numExamples)
    print('Generated RDD of %d examples sampled from the standard normal distribution'
          % normalRDD.count())
    print('  First 5 samples:')
    for sample in normalRDD.take(5):
        print('    ' + str(sample))
    print()

    # Example: RandomRDDs.normalVectorRDD
    normalVectorRDD = RandomRDDs.normalVectorRDD(sc, numRows=numExamples, numCols=2)
    print('Generated RDD of %d examples of length-2 vectors.' % normalVectorRDD.count())
    print('  First 5 samples:')
    for sample in normalVectorRDD.take(5):
        print('    ' + str(sample))
    print()

开发者ID:BaiBenny，项目名称:spark，代码行数:30，代码来源:random_rdd_generation.py

示例5: test_to_java_object_rdd

 def test_to_java_object_rdd(self):  # SPARK-6660
     data = RandomRDDs.uniformRDD(self.sc, 10, 5, seed=0)
     self.assertEqual(_to_java_object_rdd(data).count(), 10)

开发者ID:HodaAlemi，项目名称:spark，代码行数:3，代码来源:tests.py

示例6:

""" Simple distributed implementation of the K-Means algorithm using Tensorflow.
"""

import tensorflow as tf
import tensorframes as tfs
from pyspark.mllib.random import RandomRDDs
import numpy as np

num_features = 4
k = 2
# TODO: does not work with 1
data = RandomRDDs.normalVectorRDD(
    sc,
    numCols=num_features,
    numRows=100,
    seed=1).map(lambda v: [v.tolist()])
df = sqlContext.createDataFrame(data).toDF("features")

# For now, analysis is still required.
df0 = tfs.analyze(df)

init_centers = np.random.randn(k, num_features)

# For debugging
block = np.array(data.take(10))[::,0,::]

# Find the distances first
with tf.Graph().as_default() as g:
    points = tf.placeholder(tf.double, shape=[None, num_features], name='points')
    num_points = tf.shape(points)[0]
    #centers = tf.placeholder(tf.double, shape=[k, num_features], name='centers')

开发者ID:Jay-Zeng，项目名称:tensorframes，代码行数:31，代码来源:kmeans.py

示例7: SparkContext

"""

Testing with Random data generation

https://spark.apache.org/docs/latest/mllib-statistics.html

"""

from pyspark.mllib.random import RandomRDDs
from pyspark import SparkContext

sc = SparkContext("local", "Rubbish")

# Generate a random double RDD that contains 1 million i.i.d. values drawn from the
# standard normal distribution `N(0, 1)`, evenly distributed in 10 partitions.
u = RandomRDDs.uniformRDD(sc, 1000000L, 10)
# Apply a transform to get a random double RDD following `N(1, 4)`.
v = u.map(lambda x: 1.0 + 2.0 * x)

print v

开发者ID:jjingrong，项目名称:Spark-MLlib，代码行数:20，代码来源:Random_data_generation.py

示例8: test_col_norms

 def test_col_norms(self):
     data = RandomRDDs.normalVectorRDD(self.sc, 1000, 10, 10)
     summary = Statistics.colStats(data)
     self.assertEqual(10, len(summary.normL1()))
     self.assertEqual(10, len(summary.normL2()))

开发者ID:Amir-Github，项目名称:spark，代码行数:5，代码来源:tests.py

注：本文中的pyspark.mllib.random.RandomRDDs类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。