本文整理汇总了Python中pyspark.sql.functions.concat方法的典型用法代码示例。如果您正苦于以下问题:Python functions.concat方法的具体用法?Python functions.concat怎么用?Python functions.concat使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyspark.sql.functions
的用法示例。
在下文中一共展示了functions.concat方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: compile_string_concat
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import concat [as 别名]
def compile_string_concat(t, expr, scope, **kwargs):
op = expr.op()
src_columns = t.translate(op.arg, scope)
return F.concat(*src_columns)
示例2: compile_array_concat
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import concat [as 别名]
def compile_array_concat(t, expr, scope, **kwargs):
op = expr.op()
left = t.translate(op.left, scope)
right = t.translate(op.right, scope)
return F.concat(left, right)
示例3: getData
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import concat [as 别名]
def getData(self):
spk = SparkSession.builder.getOrCreate()
dataDF = spk.read.parquet(self.input)
providerDF = dataDF.select(concat(concat('provider_domain', 'content_path'), \
when(col('content_query_string') != '', concat(lit('?'), col('content_query_string')))\
.otherwise(lit(''))).alias('url'), \
concat('warc_segment', lit('/warc/'), 'warc_filename').alias('warc_filename'), \
'content_offset', 'deflate_length')\
.where(col('provider_domain').like('%{}'.format(self.domain)))\
.dropDuplicates(['url'])
providerData = providerDF.rdd.map(lambda row: '\t'.join([str(col) for col in row])).collect() #convert dataframe into a list of tab delimited elements
return providerData
示例4: __add__
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import concat [as 别名]
def __add__(self, other):
if isinstance(self.spark.data_type, StringType):
# Concatenate string columns
if isinstance(other, IndexOpsMixin) and isinstance(other.spark.data_type, StringType):
return column_op(F.concat)(self, other)
# Handle df['col'] + 'literal'
elif isinstance(other, str):
return column_op(F.concat)(self, F.lit(other))
else:
raise TypeError("string addition can only be applied to string series or literals.")
else:
return column_op(Column.__add__)(self, other)
示例5: __radd__
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import concat [as 别名]
def __radd__(self, other):
# Handle 'literal' + df['col']
if isinstance(self.spark.data_type, StringType) and isinstance(other, str):
return self._with_new_scol(F.concat(F.lit(other), self.spark.column))
else:
return column_op(Column.__radd__)(self, other)
示例6: add_prefix
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import concat [as 别名]
def add_prefix(self, prefix):
"""
Prefix labels with string `prefix`.
For Series, the row labels are prefixed.
For DataFrame, the column labels are prefixed.
Parameters
----------
prefix : str
The string to add before each label.
Returns
-------
Series
New Series with updated labels.
See Also
--------
Series.add_suffix: Suffix column labels with string `suffix`.
DataFrame.add_suffix: Suffix column labels with string `suffix`.
DataFrame.add_prefix: Prefix column labels with string `prefix`.
Examples
--------
>>> s = ks.Series([1, 2, 3, 4])
>>> s
0 1
1 2
2 3
3 4
Name: 0, dtype: int64
>>> s.add_prefix('item_')
item_0 1
item_1 2
item_2 3
item_3 4
Name: 0, dtype: int64
"""
assert isinstance(prefix, str)
internal = self.to_frame()._internal
sdf = internal.spark_frame.select(
[
F.concat(F.lit(prefix), index_spark_column).alias(index_spark_column_name)
for index_spark_column, index_spark_column_name in zip(
internal.index_spark_columns, internal.index_spark_column_names
)
]
+ internal.data_spark_columns
)
return first_series(DataFrame(internal.with_new_sdf(sdf)))
示例7: add_suffix
# 需要导入模块: from pyspark.sql import functions [as 别名]
# 或者: from pyspark.sql.functions import concat [as 别名]
def add_suffix(self, suffix):
"""
Suffix labels with string suffix.
For Series, the row labels are suffixed.
For DataFrame, the column labels are suffixed.
Parameters
----------
suffix : str
The string to add after each label.
Returns
-------
Series
New Series with updated labels.
See Also
--------
Series.add_prefix: Prefix row labels with string `prefix`.
DataFrame.add_prefix: Prefix column labels with string `prefix`.
DataFrame.add_suffix: Suffix column labels with string `suffix`.
Examples
--------
>>> s = ks.Series([1, 2, 3, 4])
>>> s
0 1
1 2
2 3
3 4
Name: 0, dtype: int64
>>> s.add_suffix('_item')
0_item 1
1_item 2
2_item 3
3_item 4
Name: 0, dtype: int64
"""
assert isinstance(suffix, str)
internal = self.to_frame()._internal
sdf = internal.spark_frame.select(
[
F.concat(index_spark_column, F.lit(suffix)).alias(index_spark_column_name)
for index_spark_column, index_spark_column_name in zip(
internal.index_spark_columns, internal.index_spark_column_names
)
]
+ internal.data_spark_columns
)
return first_series(DataFrame(internal.with_new_sdf(sdf)))