本文整理汇总了Python中pyprepbuddy.rdds.transformable_rdd.TransformableRDD.normalize方法的典型用法代码示例。如果您正苦于以下问题:Python TransformableRDD.normalize方法的具体用法?Python TransformableRDD.normalize怎么用?Python TransformableRDD.normalize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyprepbuddy.rdds.transformable_rdd.TransformableRDD
的用法示例。
在下文中一共展示了TransformableRDD.normalize方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_should_normalize_by_Decimal_Scale
# 需要导入模块: from pyprepbuddy.rdds.transformable_rdd import TransformableRDD [as 别名]
# 或者: from pyprepbuddy.rdds.transformable_rdd.TransformableRDD import normalize [as 别名]
def test_should_normalize_by_Decimal_Scale(self):
initial_dataset = self.sc.parallelize([
"07434677419,07371326239,Incoming,211,Wed Sep 15 19:17:44 +0100 2010",
"07641036117,01666472054,Outgoing,0,Mon Feb 11 07:18:23 +0000 1980",
"07641036117,07371326239,Incoming,45,Mon Feb 11 07:45:42 +0000 1980",
"07641036117,07371326239,Incoming,45,Mon Feb 11 07:45:42 +0000 1980",
"07641036117,07681546436,Missed,12,Mon Feb 11 08:04:42 +0000 1980"])
transformable_rdd = TransformableRDD(initial_dataset, 'csv')
final_rdd = transformable_rdd.normalize(3, DecimalScalingNormalizer())
normalized_durations = final_rdd.select(3).collect()
expected1 = "2.11"
expected2 = "0.0"
expected3 = "0.45"
expected4 = "0.45"
expected5 = "0.12"
self.assertTrue(normalized_durations.__contains__(expected1))
self.assertTrue(normalized_durations.__contains__(expected2))
self.assertTrue(normalized_durations.__contains__(expected3))
self.assertTrue(normalized_durations.__contains__(expected4))
self.assertTrue(normalized_durations.__contains__(expected5))
示例2: test_should_normalize_by_Min_Max_normalization
# 需要导入模块: from pyprepbuddy.rdds.transformable_rdd import TransformableRDD [as 别名]
# 或者: from pyprepbuddy.rdds.transformable_rdd.TransformableRDD import normalize [as 别名]
def test_should_normalize_by_Min_Max_normalization(self):
initial_dataset = self.sc.parallelize([
"07434677419,07371326239,Incoming,211,Wed Sep 15 19:17:44 +0100 2010",
"07641036117,01666472054,Outgoing,0,Mon Feb 11 07:18:23 +0000 1980",
"07641036117,07371326239,Incoming,45,Mon Feb 11 07:45:42 +0000 1980",
"07641036117,07371326239,Incoming,45,Mon Feb 11 07:45:42 +0000 1980",
"07641036117,07681546436,Missed,12,Mon Feb 11 08:04:42 +0000 1980"])
transformable_rdd = TransformableRDD(initial_dataset, 'csv')
final_rdd = transformable_rdd.normalize(3, MinMaxNormalizer(0, 1))
normalized_durations = final_rdd.select(3).collect()
expected1 = "1.0"
expected2 = "0.0"
expected3 = "0.2132701421800948"
expected4 = "0.2132701421800948"
expected5 = "0.05687203791469194"
self.assertTrue(normalized_durations.__contains__(expected1))
self.assertTrue(normalized_durations.__contains__(expected2))
self.assertTrue(normalized_durations.__contains__(expected3))
self.assertTrue(normalized_durations.__contains__(expected4))
self.assertTrue(normalized_durations.__contains__(expected5))
示例3: test_should_normalize_by_Z_Score_normalization
# 需要导入模块: from pyprepbuddy.rdds.transformable_rdd import TransformableRDD [as 别名]
# 或者: from pyprepbuddy.rdds.transformable_rdd.TransformableRDD import normalize [as 别名]
def test_should_normalize_by_Z_Score_normalization(self):
initial_dataset = self.sc.parallelize([
"07434677419,07371326239,Incoming,211,Wed Sep 15 19:17:44 +0100 2010",
"07641036117,01666472054,Outgoing,0,Mon Feb 11 07:18:23 +0000 1980",
"07641036117,07371326239,Incoming,45,Mon Feb 11 07:45:42 +0000 1980",
"07641036117,07371326239,Incoming,45,Mon Feb 11 07:45:42 +0000 1980",
"07641036117,07681546436,Missed,12,Mon Feb 11 08:04:42 +0000 1980"])
transformable_rdd = TransformableRDD(initial_dataset, 'csv')
final_rdd = transformable_rdd.normalize(3, ZScoreNormalizer())
normalized_durations = final_rdd.select(3).collect()
expected1 = "1.944528306701421"
expected2 = "-0.8202659838241843"
expected3 = "-0.2306179123850742"
expected4 = "-0.2306179123850742"
expected5 = "-0.6630264981070882"
self.assertTrue(normalized_durations.__contains__(expected1))
self.assertTrue(normalized_durations.__contains__(expected2))
self.assertTrue(normalized_durations.__contains__(expected3))
self.assertTrue(normalized_durations.__contains__(expected4))
self.assertTrue(normalized_durations.__contains__(expected5))