本文整理汇总了Python中pyspark.sql.column._to_java_column函数的典型用法代码示例。如果您正苦于以下问题:Python _to_java_column函数的具体用法?Python _to_java_column怎么用?Python _to_java_column使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了_to_java_column函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: datediff
def datediff(end, start):
"""
Returns the number of days from `start` to `end`.
>>> df = sqlContext.createDataFrame([('2015-04-08','2015-05-10')], ['d1', 'd2'])
>>> df.select(datediff(df.d2, df.d1).alias('diff')).collect()
[Row(diff=32)]
"""
sc = SparkContext._active_spark_context
return Column(sc._jvm.functions.datediff(_to_java_column(end), _to_java_column(start)))
示例2: levenshtein
def levenshtein(left, right):
"""Computes the Levenshtein distance of the two given strings.
>>> df0 = sqlContext.createDataFrame([('kitten', 'sitting',)], ['l', 'r'])
>>> df0.select(levenshtein('l', 'r').alias('d')).collect()
[Row(d=3)]
"""
sc = SparkContext._active_spark_context
jc = sc._jvm.functions.levenshtein(_to_java_column(left), _to_java_column(right))
return Column(jc)
示例3: months_between
def months_between(date1, date2):
"""
Returns the number of months between date1 and date2.
>>> df = sqlContext.createDataFrame([('1997-02-28 10:30:00', '1996-10-30')], ['t', 'd'])
>>> df.select(months_between(df.t, df.d).alias('months')).collect()
[Row(months=3.9495967...)]
"""
sc = SparkContext._active_spark_context
return Column(sc._jvm.functions.months_between(_to_java_column(date1), _to_java_column(date2)))
示例4: approxCountDistinct
def approxCountDistinct(col, rsd=None):
"""Returns a new :class:`Column` for approximate distinct count of ``col``.
>>> df.agg(approxCountDistinct(df.age).alias('c')).collect()
[Row(c=2)]
"""
sc = SparkContext._active_spark_context
if rsd is None:
jc = sc._jvm.functions.approxCountDistinct(_to_java_column(col))
else:
jc = sc._jvm.functions.approxCountDistinct(_to_java_column(col), rsd)
return Column(jc)
示例5: _
def _(*cols):
jcontainer = self.get_java_container(package_name=package_name, object_name=object_name, java_class_instance=java_class_instance)
# Ensure that your argument is a column
function = getattr(jcontainer, name)
judf = function()
jc = judf.apply(self.to_scala_seq([_to_java_column(c) for c in cols]))
return Column(jc)
示例6: to_avro
def to_avro(data):
"""
Converts a column into binary of avro format.
Note: Avro is built-in but external data source module since Spark 2.4. Please deploy the
application as per the deployment section of "Apache Avro Data Source Guide".
:param data: the data column.
>>> from pyspark.sql import Row
>>> from pyspark.sql.avro.functions import to_avro
>>> data = [(1, Row(name='Alice', age=2))]
>>> df = spark.createDataFrame(data, ("key", "value"))
>>> df.select(to_avro(df.value).alias("avro")).collect()
[Row(avro=bytearray(b'\\x00\\x00\\x04\\x00\\nAlice'))]
"""
sc = SparkContext._active_spark_context
try:
jc = sc._jvm.org.apache.spark.sql.avro.functions.to_avro(_to_java_column(data))
except TypeError as e:
if str(e) == "'JavaPackage' object is not callable":
_print_missing_jar("Avro", "avro", "avro", sc.version)
raise
return Column(jc)
示例7: decode
def decode(col, charset):
"""
Computes the first argument into a string from a binary using the provided character set
(one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
"""
sc = SparkContext._active_spark_context
return Column(sc._jvm.functions.decode(_to_java_column(col), charset))
示例8: shiftLeft
def shiftLeft(col, numBits):
"""Shift the the given value numBits left.
>>> sqlContext.createDataFrame([(21,)], ['a']).select(shiftLeft('a', 1).alias('r')).collect()
[Row(r=42)]
"""
sc = SparkContext._active_spark_context
return Column(sc._jvm.functions.shiftLeft(_to_java_column(col), numBits))
示例9: initcap
def initcap(col):
"""Translate the first letter of each word to upper case in the sentence.
>>> sqlContext.createDataFrame([('ab cd',)], ['a']).select(initcap("a").alias('v')).collect()
[Row(v=u'Ab Cd')]
"""
sc = SparkContext._active_spark_context
return Column(sc._jvm.functions.initcap(_to_java_column(col)))
示例10: log2
def log2(col):
"""Returns the base-2 logarithm of the argument.
>>> sqlContext.createDataFrame([(4,)], ['a']).select(log2('a').alias('log2')).collect()
[Row(log2=2.0)]
"""
sc = SparkContext._active_spark_context
return Column(sc._jvm.functions.log2(_to_java_column(col)))
示例11: log
def log(arg1, arg2=None):
"""Returns the first argument-based logarithm of the second argument.
If there is only one argument, then this takes the natural logarithm of the argument.
>>> df.select(log(10.0, df.age).alias('ten')).map(lambda l: str(l.ten)[:7]).collect()
['0.30102', '0.69897']
>>> df.select(log(df.age).alias('e')).map(lambda l: str(l.e)[:7]).collect()
['0.69314', '1.60943']
"""
sc = SparkContext._active_spark_context
if arg2 is None:
jc = sc._jvm.functions.log(_to_java_column(arg1))
else:
jc = sc._jvm.functions.log(arg1, _to_java_column(arg2))
return Column(jc)
示例12: length
def length(col):
"""Calculates the length of a string or binary expression.
>>> sqlContext.createDataFrame([('ABC',)], ['a']).select(length('a').alias('length')).collect()
[Row(length=3)]
"""
sc = SparkContext._active_spark_context
return Column(sc._jvm.functions.length(_to_java_column(col)))
示例13: from_unixtime
def from_unixtime(timestamp, format="yyyy-MM-dd HH:mm:ss"):
"""
Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string
representing the timestamp of that moment in the current system time zone in the given
format.
"""
sc = SparkContext._active_spark_context
return Column(sc._jvm.functions.from_unixtime(_to_java_column(timestamp), format))
示例14: unhex
def unhex(col):
"""Inverse of hex. Interprets each pair of characters as a hexadecimal number
and converts to the byte representation of number.
>>> sqlContext.createDataFrame([('414243',)], ['a']).select(unhex('a')).collect()
[Row(unhex(a)=bytearray(b'ABC'))]
"""
sc = SparkContext._active_spark_context
return Column(sc._jvm.functions.unhex(_to_java_column(col)))
示例15: _convertDF
def _convertDF(df, sp_key = None, metadata = None):
ctx = SparkContext._active_spark_context._rf_context
if sp_key is None:
return RasterFrame(ctx._jrfctx.asRF(df._jdf), ctx._spark_session)
else:
import json
return RasterFrame(ctx._jrfctx.asRF(
df._jdf, _to_java_column(sp_key), json.dumps(metadata)), ctx._spark_session)