本文整理匯總了Python中pyspark.mllib.common.JavaModelWrapper類的典型用法代碼示例。如果您正苦於以下問題:Python JavaModelWrapper類的具體用法?Python JavaModelWrapper怎麽用?Python JavaModelWrapper使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了JavaModelWrapper類的8個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: __init__
def __init__(self, rows, numRows=0, numCols=0):
"""
Note: This docstring is not shown publicly.
Create a wrapper over a Java RowMatrix.
Publicly, we require that `rows` be an RDD. However, for
internal usage, `rows` can also be a Java RowMatrix
object, in which case we can wrap it directly. This
assists in clean matrix conversions.
>>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6]])
>>> mat = RowMatrix(rows)
>>> mat_diff = RowMatrix(rows)
>>> (mat_diff._java_matrix_wrapper._java_model ==
... mat._java_matrix_wrapper._java_model)
False
>>> mat_same = RowMatrix(mat._java_matrix_wrapper._java_model)
>>> (mat_same._java_matrix_wrapper._java_model ==
... mat._java_matrix_wrapper._java_model)
True
"""
if isinstance(rows, RDD):
rows = rows.map(_convert_to_vector)
java_matrix = callMLlibFunc("createRowMatrix", rows, long(numRows), int(numCols))
elif (isinstance(rows, JavaObject)
and rows.getClass().getSimpleName() == "RowMatrix"):
java_matrix = rows
else:
raise TypeError("rows should be an RDD of vectors, got %s" % type(rows))
self._java_matrix_wrapper = JavaModelWrapper(java_matrix)
示例2: tallSkinnyQR
def tallSkinnyQR(self, computeQ=False):
"""
Compute the QR decomposition of this RowMatrix.
The implementation is designed to optimize the QR decomposition
(factorization) for the RowMatrix of a tall and skinny shape.
Reference:
Paul G. Constantine, David F. Gleich. "Tall and skinny QR
factorizations in MapReduce architectures"
([[http://dx.doi.org/10.1145/1996092.1996103]])
:param: computeQ: whether to computeQ
:return: QRDecomposition(Q: RowMatrix, R: Matrix), where
Q = None if computeQ = false.
>>> rows = sc.parallelize([[3, -6], [4, -8], [0, 1]])
>>> mat = RowMatrix(rows)
>>> decomp = mat.tallSkinnyQR(True)
>>> Q = decomp.Q
>>> R = decomp.R
>>> # Test with absolute values
>>> absQRows = Q.rows.map(lambda row: abs(row.toArray()).tolist())
>>> absQRows.collect()
[[0.6..., 0.0], [0.8..., 0.0], [0.0, 1.0]]
>>> # Test with absolute values
>>> abs(R.toArray()).tolist()
[[5.0, 10.0], [0.0, 1.0]]
"""
decomp = JavaModelWrapper(self._java_matrix_wrapper.call("tallSkinnyQR", computeQ))
if computeQ:
java_Q = decomp.call("Q")
Q = RowMatrix(java_Q)
else:
Q = None
R = decomp.call("R")
return QRDecomposition(Q, R)
示例3: BlockMatrix
class BlockMatrix(DistributedMatrix):
"""
Represents a distributed matrix in blocks of local matrices.
:param blocks: An RDD of sub-matrix blocks
((blockRowIndex, blockColIndex), sub-matrix) that
form this distributed matrix. If multiple blocks
with the same index exist, the results for
operations like add and multiply will be
unpredictable.
:param rowsPerBlock: Number of rows that make up each block.
The blocks forming the final rows are not
required to have the given number of rows.
:param colsPerBlock: Number of columns that make up each block.
The blocks forming the final columns are not
required to have the given number of columns.
:param numRows: Number of rows of this matrix. If the supplied
value is less than or equal to zero, the number
of rows will be calculated when `numRows` is
invoked.
:param numCols: Number of columns of this matrix. If the supplied
value is less than or equal to zero, the number
of columns will be calculated when `numCols` is
invoked.
"""
def __init__(self, blocks, rowsPerBlock, colsPerBlock, numRows=0, numCols=0):
"""
Note: This docstring is not shown publicly.
Create a wrapper over a Java BlockMatrix.
Publicly, we require that `blocks` be an RDD. However, for
internal usage, `blocks` can also be a Java BlockMatrix
object, in which case we can wrap it directly. This
assists in clean matrix conversions.
>>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
>>> mat = BlockMatrix(blocks, 3, 2)
>>> mat_diff = BlockMatrix(blocks, 3, 2)
>>> (mat_diff._java_matrix_wrapper._java_model ==
... mat._java_matrix_wrapper._java_model)
False
>>> mat_same = BlockMatrix(mat._java_matrix_wrapper._java_model, 3, 2)
>>> (mat_same._java_matrix_wrapper._java_model ==
... mat._java_matrix_wrapper._java_model)
True
"""
if isinstance(blocks, RDD):
blocks = blocks.map(_convert_to_matrix_block_tuple)
# We use DataFrames for serialization of sub-matrix blocks
# from Python, so first convert the RDD to a DataFrame on
# this side. This will convert each sub-matrix block
# tuple to a Row containing the 'blockRowIndex',
# 'blockColIndex', and 'subMatrix' values, which can
# each be easily serialized. We will convert back to
# ((blockRowIndex, blockColIndex), sub-matrix) tuples on
# the Scala side.
java_matrix = callMLlibFunc("createBlockMatrix", blocks.toDF(),
int(rowsPerBlock), int(colsPerBlock),
long(numRows), long(numCols))
elif (isinstance(blocks, JavaObject)
and blocks.getClass().getSimpleName() == "BlockMatrix"):
java_matrix = blocks
else:
raise TypeError("blocks should be an RDD of sub-matrix blocks as "
"((int, int), matrix) tuples, got %s" % type(blocks))
self._java_matrix_wrapper = JavaModelWrapper(java_matrix)
@property
def blocks(self):
"""
The RDD of sub-matrix blocks
((blockRowIndex, blockColIndex), sub-matrix) that form this
distributed matrix.
>>> mat = BlockMatrix(
... sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))]), 3, 2)
>>> blocks = mat.blocks
>>> blocks.first()
((0, 0), DenseMatrix(3, 2, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], 0))
"""
# We use DataFrames for serialization of sub-matrix blocks
# from Java, so we first convert the RDD of blocks to a
# DataFrame on the Scala/Java side. Then we map each Row in
# the DataFrame back to a sub-matrix block on this side.
blocks_df = callMLlibFunc("getMatrixBlocks", self._java_matrix_wrapper._java_model)
blocks = blocks_df.rdd.map(lambda row: ((row[0][0], row[0][1]), row[1]))
return blocks
@property
def rowsPerBlock(self):
"""
Number of rows that make up each block.
#.........這裏部分代碼省略.........
示例4: CoordinateMatrix
class CoordinateMatrix(DistributedMatrix):
"""
Represents a matrix in coordinate format.
:param entries: An RDD of MatrixEntry inputs or
(long, long, float) tuples.
:param numRows: Number of rows in the matrix. A non-positive
value means unknown, at which point the number
of rows will be determined by the max row
index plus one.
:param numCols: Number of columns in the matrix. A non-positive
value means unknown, at which point the number
of columns will be determined by the max row
index plus one.
"""
def __init__(self, entries, numRows=0, numCols=0):
"""
Note: This docstring is not shown publicly.
Create a wrapper over a Java CoordinateMatrix.
Publicly, we require that `rows` be an RDD. However, for
internal usage, `rows` can also be a Java CoordinateMatrix
object, in which case we can wrap it directly. This
assists in clean matrix conversions.
>>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2),
... MatrixEntry(6, 4, 2.1)])
>>> mat = CoordinateMatrix(entries)
>>> mat_diff = CoordinateMatrix(entries)
>>> (mat_diff._java_matrix_wrapper._java_model ==
... mat._java_matrix_wrapper._java_model)
False
>>> mat_same = CoordinateMatrix(mat._java_matrix_wrapper._java_model)
>>> (mat_same._java_matrix_wrapper._java_model ==
... mat._java_matrix_wrapper._java_model)
True
"""
if isinstance(entries, RDD):
entries = entries.map(_convert_to_matrix_entry)
# We use DataFrames for serialization of MatrixEntry entries
# from Python, so first convert the RDD to a DataFrame on
# this side. This will convert each MatrixEntry to a Row
# containing the 'i', 'j', and 'value' values, which can
# each be easily serialized. We will convert back to
# MatrixEntry inputs on the Scala side.
java_matrix = callMLlibFunc("createCoordinateMatrix", entries.toDF(),
long(numRows), long(numCols))
elif (isinstance(entries, JavaObject)
and entries.getClass().getSimpleName() == "CoordinateMatrix"):
java_matrix = entries
else:
raise TypeError("entries should be an RDD of MatrixEntry entries or "
"(long, long, float) tuples, got %s" % type(entries))
self._java_matrix_wrapper = JavaModelWrapper(java_matrix)
@property
def entries(self):
"""
Entries of the CoordinateMatrix stored as an RDD of
MatrixEntries.
>>> mat = CoordinateMatrix(sc.parallelize([MatrixEntry(0, 0, 1.2),
... MatrixEntry(6, 4, 2.1)]))
>>> entries = mat.entries
>>> entries.first()
MatrixEntry(0, 0, 1.2)
"""
# We use DataFrames for serialization of MatrixEntry entries
# from Java, so we first convert the RDD of entries to a
# DataFrame on the Scala/Java side. Then we map each Row in
# the DataFrame back to a MatrixEntry on this side.
entries_df = callMLlibFunc("getMatrixEntries", self._java_matrix_wrapper._java_model)
entries = entries_df.rdd.map(lambda row: MatrixEntry(row[0], row[1], row[2]))
return entries
def numRows(self):
"""
Get or compute the number of rows.
>>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2),
... MatrixEntry(1, 0, 2),
... MatrixEntry(2, 1, 3.7)])
>>> mat = CoordinateMatrix(entries)
>>> print(mat.numRows())
3
>>> mat = CoordinateMatrix(entries, 7, 6)
>>> print(mat.numRows())
7
"""
return self._java_matrix_wrapper.call("numRows")
def numCols(self):
"""
Get or compute the number of cols.
#.........這裏部分代碼省略.........
示例5: RowMatrix
class RowMatrix(DistributedMatrix):
"""
Represents a row-oriented distributed Matrix with no meaningful
row indices.
:param rows: An RDD of vectors.
:param numRows: Number of rows in the matrix. A non-positive
value means unknown, at which point the number
of rows will be determined by the number of
records in the `rows` RDD.
:param numCols: Number of columns in the matrix. A non-positive
value means unknown, at which point the number
of columns will be determined by the size of
the first row.
"""
def __init__(self, rows, numRows=0, numCols=0):
"""
Note: This docstring is not shown publicly.
Create a wrapper over a Java RowMatrix.
Publicly, we require that `rows` be an RDD. However, for
internal usage, `rows` can also be a Java RowMatrix
object, in which case we can wrap it directly. This
assists in clean matrix conversions.
>>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6]])
>>> mat = RowMatrix(rows)
>>> mat_diff = RowMatrix(rows)
>>> (mat_diff._java_matrix_wrapper._java_model ==
... mat._java_matrix_wrapper._java_model)
False
>>> mat_same = RowMatrix(mat._java_matrix_wrapper._java_model)
>>> (mat_same._java_matrix_wrapper._java_model ==
... mat._java_matrix_wrapper._java_model)
True
"""
if isinstance(rows, RDD):
rows = rows.map(_convert_to_vector)
java_matrix = callMLlibFunc("createRowMatrix", rows, long(numRows), int(numCols))
elif (isinstance(rows, JavaObject)
and rows.getClass().getSimpleName() == "RowMatrix"):
java_matrix = rows
else:
raise TypeError("rows should be an RDD of vectors, got %s" % type(rows))
self._java_matrix_wrapper = JavaModelWrapper(java_matrix)
@property
def rows(self):
"""
Rows of the RowMatrix stored as an RDD of vectors.
>>> mat = RowMatrix(sc.parallelize([[1, 2, 3], [4, 5, 6]]))
>>> rows = mat.rows
>>> rows.first()
DenseVector([1.0, 2.0, 3.0])
"""
return self._java_matrix_wrapper.call("rows")
def numRows(self):
"""
Get or compute the number of rows.
>>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6],
... [7, 8, 9], [10, 11, 12]])
>>> mat = RowMatrix(rows)
>>> print(mat.numRows())
4
>>> mat = RowMatrix(rows, 7, 6)
>>> print(mat.numRows())
7
"""
return self._java_matrix_wrapper.call("numRows")
def numCols(self):
"""
Get or compute the number of cols.
>>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6],
... [7, 8, 9], [10, 11, 12]])
>>> mat = RowMatrix(rows)
>>> print(mat.numCols())
3
>>> mat = RowMatrix(rows, 7, 6)
>>> print(mat.numCols())
6
"""
return self._java_matrix_wrapper.call("numCols")
@since('2.0.0')
def computeColumnSummaryStatistics(self):
"""
Computes column-wise summary statistics.
#.........這裏部分代碼省略.........
示例6: IndexedRowMatrix
class IndexedRowMatrix(DistributedMatrix):
"""
Represents a row-oriented distributed Matrix with indexed rows.
:param rows: An RDD of IndexedRows or (long, vector) tuples.
:param numRows: Number of rows in the matrix. A non-positive
value means unknown, at which point the number
of rows will be determined by the max row
index plus one.
:param numCols: Number of columns in the matrix. A non-positive
value means unknown, at which point the number
of columns will be determined by the size of
the first row.
"""
def __init__(self, rows, numRows=0, numCols=0):
"""
Note: This docstring is not shown publicly.
Create a wrapper over a Java IndexedRowMatrix.
Publicly, we require that `rows` be an RDD. However, for
internal usage, `rows` can also be a Java IndexedRowMatrix
object, in which case we can wrap it directly. This
assists in clean matrix conversions.
>>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),
... IndexedRow(1, [4, 5, 6])])
>>> mat = IndexedRowMatrix(rows)
>>> mat_diff = IndexedRowMatrix(rows)
>>> (mat_diff._java_matrix_wrapper._java_model ==
... mat._java_matrix_wrapper._java_model)
False
>>> mat_same = IndexedRowMatrix(mat._java_matrix_wrapper._java_model)
>>> (mat_same._java_matrix_wrapper._java_model ==
... mat._java_matrix_wrapper._java_model)
True
"""
if isinstance(rows, RDD):
rows = rows.map(_convert_to_indexed_row)
# We use DataFrames for serialization of IndexedRows from
# Python, so first convert the RDD to a DataFrame on this
# side. This will convert each IndexedRow to a Row
# containing the 'index' and 'vector' values, which can
# both be easily serialized. We will convert back to
# IndexedRows on the Scala side.
java_matrix = callMLlibFunc("createIndexedRowMatrix", rows.toDF(),
long(numRows), int(numCols))
elif (isinstance(rows, JavaObject)
and rows.getClass().getSimpleName() == "IndexedRowMatrix"):
java_matrix = rows
else:
raise TypeError("rows should be an RDD of IndexedRows or (long, vector) tuples, "
"got %s" % type(rows))
self._java_matrix_wrapper = JavaModelWrapper(java_matrix)
@property
def rows(self):
"""
Rows of the IndexedRowMatrix stored as an RDD of IndexedRows.
>>> mat = IndexedRowMatrix(sc.parallelize([IndexedRow(0, [1, 2, 3]),
... IndexedRow(1, [4, 5, 6])]))
>>> rows = mat.rows
>>> rows.first()
IndexedRow(0, [1.0,2.0,3.0])
"""
# We use DataFrames for serialization of IndexedRows from
# Java, so we first convert the RDD of rows to a DataFrame
# on the Scala/Java side. Then we map each Row in the
# DataFrame back to an IndexedRow on this side.
rows_df = callMLlibFunc("getIndexedRows", self._java_matrix_wrapper._java_model)
rows = rows_df.rdd.map(lambda row: IndexedRow(row[0], row[1]))
return rows
def numRows(self):
"""
Get or compute the number of rows.
>>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),
... IndexedRow(1, [4, 5, 6]),
... IndexedRow(2, [7, 8, 9]),
... IndexedRow(3, [10, 11, 12])])
>>> mat = IndexedRowMatrix(rows)
>>> print(mat.numRows())
4
>>> mat = IndexedRowMatrix(rows, 7, 6)
>>> print(mat.numRows())
7
"""
return self._java_matrix_wrapper.call("numRows")
def numCols(self):
"""
Get or compute the number of cols.
#.........這裏部分代碼省略.........
示例7: RowMatrix
class RowMatrix(DistributedMatrix):
"""
.. note:: Experimental
Represents a row-oriented distributed Matrix with no meaningful
row indices.
:param rows: An RDD of vectors.
:param numRows: Number of rows in the matrix. A non-positive
value means unknown, at which point the number
of rows will be determined by the number of
records in the `rows` RDD.
:param numCols: Number of columns in the matrix. A non-positive
value means unknown, at which point the number
of columns will be determined by the size of
the first row.
"""
def __init__(self, rows, numRows=0, numCols=0):
"""
Note: This docstring is not shown publicly.
Create a wrapper over a Java RowMatrix.
Publicly, we require that `rows` be an RDD. However, for
internal usage, `rows` can also be a Java RowMatrix
object, in which case we can wrap it directly. This
assists in clean matrix conversions.
>>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6]])
>>> mat = RowMatrix(rows)
>>> mat_diff = RowMatrix(rows)
>>> (mat_diff._java_matrix_wrapper._java_model ==
... mat._java_matrix_wrapper._java_model)
False
>>> mat_same = RowMatrix(mat._java_matrix_wrapper._java_model)
>>> (mat_same._java_matrix_wrapper._java_model ==
... mat._java_matrix_wrapper._java_model)
True
"""
if isinstance(rows, RDD):
rows = rows.map(_convert_to_vector)
java_matrix = callMLlibFunc("createRowMatrix", rows, long(numRows), int(numCols))
elif (isinstance(rows, JavaObject)
and rows.getClass().getSimpleName() == "RowMatrix"):
java_matrix = rows
else:
raise TypeError("rows should be an RDD of vectors, got %s" % type(rows))
self._java_matrix_wrapper = JavaModelWrapper(java_matrix)
@property
def rows(self):
"""
Rows of the RowMatrix stored as an RDD of vectors.
>>> mat = RowMatrix(sc.parallelize([[1, 2, 3], [4, 5, 6]]))
>>> rows = mat.rows
>>> rows.first()
DenseVector([1.0, 2.0, 3.0])
"""
return self._java_matrix_wrapper.call("rows")
def numRows(self):
"""
Get or compute the number of rows.
>>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6],
... [7, 8, 9], [10, 11, 12]])
>>> mat = RowMatrix(rows)
>>> print(mat.numRows())
4
>>> mat = RowMatrix(rows, 7, 6)
>>> print(mat.numRows())
7
"""
return self._java_matrix_wrapper.call("numRows")
def numCols(self):
"""
Get or compute the number of cols.
>>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6],
... [7, 8, 9], [10, 11, 12]])
>>> mat = RowMatrix(rows)
>>> print(mat.numCols())
3
>>> mat = RowMatrix(rows, 7, 6)
>>> print(mat.numCols())
6
"""
return self._java_matrix_wrapper.call("numCols")
示例8: BlockMatrix
class BlockMatrix(DistributedMatrix):
"""
.. note:: Experimental
Represents a distributed matrix in blocks of local matrices.
:param blocks: An RDD of sub-matrix blocks
((blockRowIndex, blockColIndex), sub-matrix) that
form this distributed matrix. If multiple blocks
with the same index exist, the results for
operations like add and multiply will be
unpredictable.
:param rowsPerBlock: Number of rows that make up each block.
The blocks forming the final rows are not
required to have the given number of rows.
:param colsPerBlock: Number of columns that make up each block.
The blocks forming the final columns are not
required to have the given number of columns.
:param numRows: Number of rows of this matrix. If the supplied
value is less than or equal to zero, the number
of rows will be calculated when `numRows` is
invoked.
:param numCols: Number of columns of this matrix. If the supplied
value is less than or equal to zero, the number
of columns will be calculated when `numCols` is
invoked.
"""
def __init__(self, blocks, rowsPerBlock, colsPerBlock, numRows=0, numCols=0):
"""
Note: This docstring is not shown publicly.
Create a wrapper over a Java BlockMatrix.
Publicly, we require that `blocks` be an RDD. However, for
internal usage, `blocks` can also be a Java BlockMatrix
object, in which case we can wrap it directly. This
assists in clean matrix conversions.
>>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
>>> mat = BlockMatrix(blocks, 3, 2)
>>> mat_diff = BlockMatrix(blocks, 3, 2)
>>> (mat_diff._java_matrix_wrapper._java_model ==
... mat._java_matrix_wrapper._java_model)
False
>>> mat_same = BlockMatrix(mat._java_matrix_wrapper._java_model, 3, 2)
>>> (mat_same._java_matrix_wrapper._java_model ==
... mat._java_matrix_wrapper._java_model)
True
"""
if isinstance(blocks, RDD):
blocks = blocks.map(_convert_to_matrix_block_tuple)
# We use DataFrames for serialization of sub-matrix blocks
# from Python, so first convert the RDD to a DataFrame on
# this side. This will convert each sub-matrix block
# tuple to a Row containing the 'blockRowIndex',
# 'blockColIndex', and 'subMatrix' values, which can
# each be easily serialized. We will convert back to
# ((blockRowIndex, blockColIndex), sub-matrix) tuples on
# the Scala side.
java_matrix = callMLlibFunc("createBlockMatrix", blocks.toDF(),
int(rowsPerBlock), int(colsPerBlock),
long(numRows), long(numCols))
elif (isinstance(blocks, JavaObject)
and blocks.getClass().getSimpleName() == "BlockMatrix"):
java_matrix = blocks
else:
raise TypeError("blocks should be an RDD of sub-matrix blocks as "
"((int, int), matrix) tuples, got %s" % type(blocks))
self._java_matrix_wrapper = JavaModelWrapper(java_matrix)
@property
def blocks(self):
"""
The RDD of sub-matrix blocks
((blockRowIndex, blockColIndex), sub-matrix) that form this
distributed matrix.
>>> mat = BlockMatrix(
... sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
... ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))]), 3, 2)
>>> blocks = mat.blocks
>>> blocks.first()
((0, 0), DenseMatrix(3, 2, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], 0))
"""
# We use DataFrames for serialization of sub-matrix blocks
# from Java, so we first convert the RDD of blocks to a
# DataFrame on the Scala/Java side. Then we map each Row in
# the DataFrame back to a sub-matrix block on this side.
blocks_df = callMLlibFunc("getMatrixBlocks", self._java_matrix_wrapper._java_model)
blocks = blocks_df.map(lambda row: ((row[0][0], row[0][1]), row[1]))
return blocks
@property
def rowsPerBlock(self):
"""
#.........這裏部分代碼省略.........