本文整理汇总了Python中pyspark.sql.utils.toJArray函数的典型用法代码示例。如果您正苦于以下问题:Python toJArray函数的具体用法?Python toJArray怎么用?Python toJArray使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了toJArray函数的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load
def load(self, path=None, format=None, schema=None, **options):
"""Loads data from a data source and returns it as a :class`DataFrame`.
:param path: optional string for file-system backed data sources.
:param format: optional string for format of the data source. Default to 'parquet'.
:param schema: optional :class:`StructType` for the input schema.
:param options: all other string options
>>> df = sqlContext.read.load('python/test_support/sql/parquet_partitioned', opt1=True,
... opt2=1, opt3='str')
>>> df.dtypes
[('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')]
>>> df = sqlContext.read.format('json').load(['python/test_support/sql/people.json',
... 'python/test_support/sql/people1.json'])
>>> df.dtypes
[('age', 'bigint'), ('aka', 'string'), ('name', 'string')]
"""
if format is not None:
self.format(format)
if schema is not None:
self.schema(schema)
self.options(**options)
if path is not None:
if type(path) == list:
paths = path
gateway = self._sqlContext._sc._gateway
jpaths = utils.toJArray(gateway, gateway.jvm.java.lang.String, paths)
return self._df(self._jreader.load(jpaths))
else:
return self._df(self._jreader.load(path))
else:
return self._df(self._jreader.load())
示例2: aggregate_properties
def aggregate_properties(self, app_name, entity_type, channel_name=None,
start_time=None, until_time=None, required=None):
pes = self._sc._jvm.org.apache.predictionio.data.store.python.PPythonEventStore
jdf = pes.aggregateProperties(app_name, entity_type, channel_name,
start_time, until_time,
utils.toJArray(self._sc._gateway, self._sc._gateway.jvm.String, required),
self._jss)
return DataFrame(jdf, self.sql_ctx)
示例3: jdbc
def jdbc(
self,
url,
table,
column=None,
lowerBound=None,
upperBound=None,
numPartitions=None,
predicates=None,
properties=None,
):
"""
Construct a :class:`DataFrame` representing the database table named ``table``
accessible via JDBC URL ``url`` and connection ``properties``.
Partitions of the table will be retrieved in parallel if either ``column`` or
``predicates`` is specified.
If both ``column`` and ``predicates`` are specified, ``column`` will be used.
.. note:: Don't create too many partitions in parallel on a large cluster; \
otherwise Spark might crash your external database systems.
:param url: a JDBC URL of the form ``jdbc:subprotocol:subname``
:param table: the name of the table
:param column: the name of an integer column that will be used for partitioning;
if this parameter is specified, then ``numPartitions``, ``lowerBound``
(inclusive), and ``upperBound`` (exclusive) will form partition strides
for generated WHERE clause expressions used to split the column
``column`` evenly
:param lowerBound: the minimum value of ``column`` used to decide partition stride
:param upperBound: the maximum value of ``column`` used to decide partition stride
:param numPartitions: the number of partitions
:param predicates: a list of expressions suitable for inclusion in WHERE clauses;
each one defines one partition of the :class:`DataFrame`
:param properties: a dictionary of JDBC database connection arguments. Normally at
least properties "user" and "password" with their corresponding values.
For example { 'user' : 'SYSTEM', 'password' : 'mypassword' }
:return: a DataFrame
"""
if properties is None:
properties = dict()
jprop = JavaClass("java.util.Properties", self._spark._sc._gateway._gateway_client)()
for k in properties:
jprop.setProperty(k, properties[k])
if column is not None:
if numPartitions is None:
numPartitions = self._spark._sc.defaultParallelism
return self._df(
self._jreader.jdbc(url, table, column, int(lowerBound), int(upperBound), int(numPartitions), jprop)
)
if predicates is not None:
gateway = self._spark._sc._gateway
jpredicates = utils.toJArray(gateway, gateway.jvm.java.lang.String, predicates)
return self._df(self._jreader.jdbc(url, table, jpredicates, jprop))
return self._df(self._jreader.jdbc(url, table, jprop))
示例4: jdbc
def jdbc(
self,
url,
table,
column=None,
lowerBound=None,
upperBound=None,
numPartitions=None,
predicates=None,
properties=None,
):
"""
Construct a :class:`DataFrame` representing the database table accessible
via JDBC URL `url` named `table` and connection `properties`.
The `column` parameter could be used to partition the table, then it will
be retrieved in parallel based on the parameters passed to this function.
The `predicates` parameter gives a list expressions suitable for inclusion
in WHERE clauses; each one defines one partition of the :class:`DataFrame`.
::Note: Don't create too many partitions in parallel on a large cluster;
otherwise Spark might crash your external database systems.
:param url: a JDBC URL
:param table: name of table
:param column: the column used to partition
:param lowerBound: the lower bound of partition column
:param upperBound: the upper bound of the partition column
:param numPartitions: the number of partitions
:param predicates: a list of expressions
:param properties: JDBC database connection arguments, a list of arbitrary string
tag/value. Normally at least a "user" and "password" property
should be included.
:return: a DataFrame
"""
if properties is None:
properties = dict()
jprop = JavaClass("java.util.Properties", self._sqlContext._sc._gateway._gateway_client)()
for k in properties:
jprop.setProperty(k, properties[k])
if column is not None:
if numPartitions is None:
numPartitions = self._sqlContext._sc.defaultParallelism
return self._df(
self._jreader.jdbc(url, table, column, int(lowerBound), int(upperBound), int(numPartitions), jprop)
)
if predicates is not None:
gateway = self._sqlContext._sc._gateway
jpredicates = utils.toJArray(gateway, gateway.jvm.java.lang.String, predicates)
return self._df(self._jreader.jdbc(url, table, jpredicates, jprop))
return self._df(self._jreader.jdbc(url, table, jprop))