本文整理汇总了Python中pyspark.mllib.linalg._convert_to_vector函数的典型用法代码示例。如果您正苦于以下问题:Python _convert_to_vector函数的具体用法?Python _convert_to_vector怎么用?Python _convert_to_vector使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了_convert_to_vector函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: appendBias
def appendBias(data):
"""
Returns a new vector with `1.0` (bias) appended to
the end of the input vector.
"""
vec = _convert_to_vector(data)
if isinstance(vec, SparseVector):
newIndices = np.append(vec.indices, len(vec))
newValues = np.append(vec.values, 1.0)
return SparseVector(len(vec) + 1, newIndices, newValues)
else:
return _convert_to_vector(np.append(vec.toArray(), 1.0))
示例2: _regression_train_wrapper
def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
from pyspark.mllib.classification import LogisticRegressionModel
first = data.first()
if not isinstance(first, LabeledPoint):
raise TypeError("data should be an RDD of LabeledPoint, but got %s" % type(first))
if initial_weights is None:
initial_weights = [0.0] * len(data.first().features)
if (modelClass == LogisticRegressionModel):
weights, intercept, numFeatures, numClasses = train_func(
data, _convert_to_vector(initial_weights))
return modelClass(weights, intercept, numFeatures, numClasses)
else:
weights, intercept = train_func(data, _convert_to_vector(initial_weights))
return modelClass(weights, intercept)
示例3: update
def update(self, data, decayFactor, timeUnit):
"""Update the centroids, according to data
:param data:
RDD with new data for the model update.
:param decayFactor:
Forgetfulness of the previous centroids.
:param timeUnit:
Can be "batches" or "points". If points, then the decay factor
is raised to the power of number of new points and if batches,
then decay factor will be used as is.
"""
if not isinstance(data, RDD):
raise TypeError("Data should be of an RDD, got %s." % type(data))
data = data.map(_convert_to_vector)
decayFactor = float(decayFactor)
if timeUnit not in ["batches", "points"]:
raise ValueError(
"timeUnit should be 'batches' or 'points', got %s." % timeUnit)
vectorCenters = [_convert_to_vector(center) for center in self.centers]
updatedModel = callMLlibFunc(
"updateStreamingKMeansModel", vectorCenters, self._clusterWeights,
data, decayFactor, timeUnit)
self.centers = array(updatedModel[0])
self._clusterWeights = list(updatedModel[1])
return self
示例4: _regression_train_wrapper
def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
first = data.first()
if not isinstance(first, LabeledPoint):
raise ValueError("data should be an RDD of LabeledPoint, but got %s" % first)
initial_weights = initial_weights or [0.0] * len(data.first().features)
weights, intercept = train_func(data, _convert_to_vector(initial_weights))
return modelClass(weights, intercept)
示例5: predict
def predict(self, x):
"""
Predict the label of one or more examples.
:param x: Data point (feature vector),
or an RDD of data points (feature vectors).
"""
SerDe = self._sc._jvm.SerDe
ser = PickleSerializer()
if isinstance(x, RDD):
# Bulk prediction
first = x.take(1)
if not first:
return self._sc.parallelize([])
if not isinstance(first[0], Vector):
x = x.map(_convert_to_vector)
jPred = self._java_model.predict(x._to_java_object_rdd()).toJavaRDD()
jpyrdd = self._sc._jvm.PythonRDD.javaToPython(jPred)
return RDD(jpyrdd, self._sc, BatchedSerializer(ser, 1024))
else:
# Assume x is a single data point.
bytes = bytearray(ser.dumps(_convert_to_vector(x)))
vec = self._sc._jvm.SerDe.loads(bytes)
return self._java_model.predict(vec)
示例6: predictSoft
def predictSoft(self, x):
"""
Find the membership of point 'x' or each point in RDD 'x' to all mixture components.
:param x: vector or RDD of vector represents data points.
:return: the membership value to all mixture components for vector 'x'
or each vector in RDD 'x'.
"""
if isinstance(x, RDD):
means, sigmas = zip(*[(g.mu, g.sigma) for g in self.gaussians])
membership_matrix = callMLlibFunc(
"predictSoftGMM", x.map(_convert_to_vector), _convert_to_vector(self.weights), means, sigmas
)
return membership_matrix.map(lambda x: pyarray.array("d", x))
else:
return self.call("predictSoft", _convert_to_vector(x)).toArray()
示例7: predict
def predict(self, x):
"""
Predict the value of the dependent variable given a vector x
containing values for the independent variables.
"""
x = _convert_to_vector(x)
return self.weights.dot(x) + self.intercept
示例8: train
def train(
cls,
rdd,
k,
maxIterations=100,
runs=1,
initializationMode="k-means||",
seed=None,
initializationSteps=5,
epsilon=1e-4,
initialModel=None,
):
"""Train a k-means clustering model."""
clusterInitialModel = []
if initialModel is not None:
if not isinstance(initialModel, KMeansModel):
raise Exception(
"initialModel is of " + str(type(initialModel)) + ". It needs " "to be of <type 'KMeansModel'>"
)
clusterInitialModel = [_convert_to_vector(c) for c in initialModel.clusterCenters]
model = callMLlibFunc(
"trainKMeansModel",
rdd.map(_convert_to_vector),
k,
maxIterations,
runs,
initializationMode,
seed,
initializationSteps,
epsilon,
clusterInitialModel,
)
centers = callJavaFunc(rdd.context, model.clusterCenters)
return KMeansModel([c.toArray() for c in centers])
示例9: save
def save(self, sc, path):
"""
Save this model to the given path.
"""
java_centers = _py2java(sc, [_convert_to_vector(c) for c in self.centers])
java_model = sc._jvm.org.apache.spark.mllib.clustering.KMeansModel(java_centers)
java_model.save(sc._jsc.sc(), path)
示例10: computeCost
def computeCost(self, rdd):
"""
Return the K-means cost (sum of squared distances of points to
their nearest center) for this model on the given data.
"""
cost = callMLlibFunc("computeCostKmeansModel", rdd.map(_convert_to_vector),
[_convert_to_vector(c) for c in self.centers])
return cost
示例11: test_serialize
def test_serialize(self):
from scipy.sparse import lil_matrix
lil = lil_matrix((4, 1))
lil[1, 0] = 1
lil[3, 0] = 2
sv = SparseVector(4, {1: 1, 3: 2})
self.assertEqual(sv, _convert_to_vector(lil))
self.assertEqual(sv, _convert_to_vector(lil.tocsc()))
self.assertEqual(sv, _convert_to_vector(lil.tocoo()))
self.assertEqual(sv, _convert_to_vector(lil.tocsr()))
self.assertEqual(sv, _convert_to_vector(lil.todok()))
def serialize(l):
return ser.loads(ser.dumps(_convert_to_vector(l)))
self.assertEqual(sv, serialize(lil))
self.assertEqual(sv, serialize(lil.tocsc()))
self.assertEqual(sv, serialize(lil.tocsr()))
self.assertEqual(sv, serialize(lil.todok()))
示例12: predict
def predict(self, x):
"""
Predict the value of the dependent variable given a vector or
an RDD of vectors containing values for the independent variables.
"""
if isinstance(x, RDD):
return x.map(self.predict)
x = _convert_to_vector(x)
return self.weights.dot(x) + self.intercept
示例13: setInitialWeights
def setInitialWeights(self, initialWeights):
"""
Set the initial value of weights.
This must be set before running trainOn and predictOn
"""
initialWeights = _convert_to_vector(initialWeights)
self._model = LinearRegressionModel(initialWeights, 0)
return self
示例14: predict
def predict(self, x):
"""
Return the most likely class for a data vector
or an RDD of vectors
"""
if isinstance(x, RDD):
return x.map(lambda v: self.predict(v))
x = _convert_to_vector(x)
return self.labels[numpy.argmax(self.pi + x.dot(self.theta.transpose()))]
示例15: predict_all
def predict_all(self, x):
if isinstance(x, RDD):
return x.map(lambda v: self.predict_all(v))
x = _convert_to_vector(x)
log_probs = self.pi + x.dot(self.theta.transpose())
scaled_log_probs = scale(log_probs)
int_lables = [int(l_i) for l_i in self.labels]
labels_and_log_probs = zip(int_lables, scaled_log_probs)
return sorted(labels_and_log_probs, key=lambda x: x[1], reverse=True)