Python linalg._convert_to_vector函数代码示例

本文整理汇总了Python中pyspark.mllib.linalg._convert_to_vector函数的典型用法代码示例。如果您正苦于以下问题：Python _convert_to_vector函数的具体用法？Python _convert_to_vector怎么用？Python _convert_to_vector使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了_convert_to_vector函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: appendBias

 def appendBias(data):
     """
     Returns a new vector with `1.0` (bias) appended to
     the end of the input vector.
     """
     vec = _convert_to_vector(data)
     if isinstance(vec, SparseVector):
         newIndices = np.append(vec.indices, len(vec))
         newValues = np.append(vec.values, 1.0)
         return SparseVector(len(vec) + 1, newIndices, newValues)
     else:
         return _convert_to_vector(np.append(vec.toArray(), 1.0))

开发者ID:AsafZ，项目名称:spark，代码行数:12，代码来源:util.py

示例2: _regression_train_wrapper

def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
    from pyspark.mllib.classification import LogisticRegressionModel
    first = data.first()
    if not isinstance(first, LabeledPoint):
        raise TypeError("data should be an RDD of LabeledPoint, but got %s" % type(first))
    if initial_weights is None:
        initial_weights = [0.0] * len(data.first().features)
    if (modelClass == LogisticRegressionModel):
        weights, intercept, numFeatures, numClasses = train_func(
            data, _convert_to_vector(initial_weights))
        return modelClass(weights, intercept, numFeatures, numClasses)
    else:
        weights, intercept = train_func(data, _convert_to_vector(initial_weights))
        return modelClass(weights, intercept)

开发者ID:BeforeRain，项目名称:spark，代码行数:14，代码来源:regression.py

示例3: update

    def update(self, data, decayFactor, timeUnit):
        """Update the centroids, according to data

        :param data:
          RDD with new data for the model update.
        :param decayFactor:
          Forgetfulness of the previous centroids.
        :param timeUnit:
          Can be "batches" or "points". If points, then the decay factor
          is raised to the power of number of new points and if batches,
          then decay factor will be used as is.
        """
        if not isinstance(data, RDD):
            raise TypeError("Data should be of an RDD, got %s." % type(data))
        data = data.map(_convert_to_vector)
        decayFactor = float(decayFactor)
        if timeUnit not in ["batches", "points"]:
            raise ValueError(
                "timeUnit should be 'batches' or 'points', got %s." % timeUnit)
        vectorCenters = [_convert_to_vector(center) for center in self.centers]
        updatedModel = callMLlibFunc(
            "updateStreamingKMeansModel", vectorCenters, self._clusterWeights,
            data, decayFactor, timeUnit)
        self.centers = array(updatedModel[0])
        self._clusterWeights = list(updatedModel[1])
        return self

开发者ID:11wzy001，项目名称:spark，代码行数:26，代码来源:clustering.py

示例4: _regression_train_wrapper

def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
    first = data.first()
    if not isinstance(first, LabeledPoint):
        raise ValueError("data should be an RDD of LabeledPoint, but got %s" % first)
    initial_weights = initial_weights or [0.0] * len(data.first().features)
    weights, intercept = train_func(data, _convert_to_vector(initial_weights))
    return modelClass(weights, intercept)

开发者ID:dnprock，项目名称:spark，代码行数:7，代码来源:regression.py

示例5: predict

    def predict(self, x):
        """
        Predict the label of one or more examples.

        :param x:  Data point (feature vector),
                   or an RDD of data points (feature vectors).
        """
        SerDe = self._sc._jvm.SerDe
        ser = PickleSerializer()
        if isinstance(x, RDD):
            # Bulk prediction
            first = x.take(1)
            if not first:
                return self._sc.parallelize([])
            if not isinstance(first[0], Vector):
                x = x.map(_convert_to_vector)
            jPred = self._java_model.predict(x._to_java_object_rdd()).toJavaRDD()
            jpyrdd = self._sc._jvm.PythonRDD.javaToPython(jPred)
            return RDD(jpyrdd, self._sc, BatchedSerializer(ser, 1024))

        else:
            # Assume x is a single data point.
            bytes = bytearray(ser.dumps(_convert_to_vector(x)))
            vec = self._sc._jvm.SerDe.loads(bytes)
            return self._java_model.predict(vec)

开发者ID:312268112，项目名称:spark，代码行数:25，代码来源:tree.py

示例6: predictSoft

    def predictSoft(self, x):
        """
        Find the membership of point 'x' or each point in RDD 'x' to all mixture components.

        :param x:    vector or RDD of vector represents data points.
        :return:     the membership value to all mixture components for vector 'x'
                     or each vector in RDD 'x'.
        """
        if isinstance(x, RDD):
            means, sigmas = zip(*[(g.mu, g.sigma) for g in self.gaussians])
            membership_matrix = callMLlibFunc(
                "predictSoftGMM", x.map(_convert_to_vector), _convert_to_vector(self.weights), means, sigmas
            )
            return membership_matrix.map(lambda x: pyarray.array("d", x))
        else:
            return self.call("predictSoft", _convert_to_vector(x)).toArray()

开发者ID:Raynes，项目名称:spark，代码行数:16，代码来源:clustering.py

示例7: predict

 def predict(self, x):
     """
     Predict the value of the dependent variable given a vector x
     containing values for the independent variables.
     """
     x = _convert_to_vector(x)
     return self.weights.dot(x) + self.intercept

开发者ID:31z4，项目名称:spark，代码行数:7，代码来源:regression.py

示例8: train

 def train(
     cls,
     rdd,
     k,
     maxIterations=100,
     runs=1,
     initializationMode="k-means||",
     seed=None,
     initializationSteps=5,
     epsilon=1e-4,
     initialModel=None,
 ):
     """Train a k-means clustering model."""
     clusterInitialModel = []
     if initialModel is not None:
         if not isinstance(initialModel, KMeansModel):
             raise Exception(
                 "initialModel is of " + str(type(initialModel)) + ". It needs " "to be of <type 'KMeansModel'>"
             )
         clusterInitialModel = [_convert_to_vector(c) for c in initialModel.clusterCenters]
     model = callMLlibFunc(
         "trainKMeansModel",
         rdd.map(_convert_to_vector),
         k,
         maxIterations,
         runs,
         initializationMode,
         seed,
         initializationSteps,
         epsilon,
         clusterInitialModel,
     )
     centers = callJavaFunc(rdd.context, model.clusterCenters)
     return KMeansModel([c.toArray() for c in centers])

开发者ID:BeforeRain，项目名称:spark，代码行数:34，代码来源:clustering.py

示例9: save

 def save(self, sc, path):
     """
     Save this model to the given path.
     """
     java_centers = _py2java(sc, [_convert_to_vector(c) for c in self.centers])
     java_model = sc._jvm.org.apache.spark.mllib.clustering.KMeansModel(java_centers)
     java_model.save(sc._jsc.sc(), path)

开发者ID:11wzy001，项目名称:spark，代码行数:7，代码来源:clustering.py

示例10: computeCost

 def computeCost(self, rdd):
     """
     Return the K-means cost (sum of squared distances of points to
     their nearest center) for this model on the given data.
     """
     cost = callMLlibFunc("computeCostKmeansModel", rdd.map(_convert_to_vector),
                          [_convert_to_vector(c) for c in self.centers])
     return cost

开发者ID:GuoNing89，项目名称:Study，代码行数:8，代码来源:clustering.py

示例11: test_serialize

    def test_serialize(self):
        from scipy.sparse import lil_matrix
        lil = lil_matrix((4, 1))
        lil[1, 0] = 1
        lil[3, 0] = 2
        sv = SparseVector(4, {1: 1, 3: 2})
        self.assertEqual(sv, _convert_to_vector(lil))
        self.assertEqual(sv, _convert_to_vector(lil.tocsc()))
        self.assertEqual(sv, _convert_to_vector(lil.tocoo()))
        self.assertEqual(sv, _convert_to_vector(lil.tocsr()))
        self.assertEqual(sv, _convert_to_vector(lil.todok()))

        def serialize(l):
            return ser.loads(ser.dumps(_convert_to_vector(l)))
        self.assertEqual(sv, serialize(lil))
        self.assertEqual(sv, serialize(lil.tocsc()))
        self.assertEqual(sv, serialize(lil.tocsr()))
        self.assertEqual(sv, serialize(lil.todok()))

开发者ID:drewrobb，项目名称:spark，代码行数:18，代码来源:test_linalg.py

示例12: predict

 def predict(self, x):
     """
     Predict the value of the dependent variable given a vector or
     an RDD of vectors containing values for the independent variables.
     """
     if isinstance(x, RDD):
         return x.map(self.predict)
     x = _convert_to_vector(x)
     return self.weights.dot(x) + self.intercept

开发者ID:BeforeRain，项目名称:spark，代码行数:9，代码来源:regression.py

示例13: setInitialWeights

    def setInitialWeights(self, initialWeights):
        """
        Set the initial value of weights.

        This must be set before running trainOn and predictOn
        """
        initialWeights = _convert_to_vector(initialWeights)
        self._model = LinearRegressionModel(initialWeights, 0)
        return self

开发者ID:BeforeRain，项目名称:spark，代码行数:9，代码来源:regression.py

示例14: predict

 def predict(self, x):
     """
     Return the most likely class for a data vector
     or an RDD of vectors
     """
     if isinstance(x, RDD):
         return x.map(lambda v: self.predict(v))
     x = _convert_to_vector(x)
     return self.labels[numpy.argmax(self.pi + x.dot(self.theta.transpose()))]

开发者ID:vijaykiran，项目名称:spark，代码行数:9，代码来源:classification.py

示例15: predict_all

 def predict_all(self, x):
     if isinstance(x, RDD):
         return x.map(lambda v: self.predict_all(v))
     x = _convert_to_vector(x)
     log_probs = self.pi + x.dot(self.theta.transpose())
     scaled_log_probs = scale(log_probs)
     int_lables = [int(l_i) for l_i in self.labels]
     labels_and_log_probs = zip(int_lables, scaled_log_probs)
     return sorted(labels_and_log_probs, key=lambda x: x[1], reverse=True)

开发者ID:AlexFridman，项目名称:Multi-label-classification-with-spark，代码行数:9，代码来源:mlbayes.py

注：本文中的pyspark.mllib.linalg._convert_to_vector函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。