本文整理汇总了Python中featuretools.computational_backends.PandasBackend.calculate_all_features方法的典型用法代码示例。如果您正苦于以下问题:Python PandasBackend.calculate_all_features方法的具体用法?Python PandasBackend.calculate_all_features怎么用?Python PandasBackend.calculate_all_features使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类featuretools.computational_backends.PandasBackend
的用法示例。
在下文中一共展示了PandasBackend.calculate_all_features方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: check
# 需要导入模块: from featuretools.computational_backends import PandasBackend [as 别名]
# 或者: from featuretools.computational_backends.PandasBackend import calculate_all_features [as 别名]
def check(feature):
pandas_backend = PandasBackend(es, [feature])
df_1 = pandas_backend.calculate_all_features(instance_ids=[0, 1, 2], time_last=None)
df_2 = pandas_backend.calculate_all_features(instance_ids=[2], time_last=None)
# check that the value for instance id 2 matches
assert (df_2.loc[2] == df_1.loc[2]).all()
示例2: test_arithmetic_of_direct
# 需要导入模块: from featuretools.computational_backends import PandasBackend [as 别名]
# 或者: from featuretools.computational_backends.PandasBackend import calculate_all_features [as 别名]
def test_arithmetic_of_direct(es):
rating = es['products']['rating']
log_rating = DirectFeature(rating,
child_entity=es['log'])
customer_age = es['customers']['age']
session_age = DirectFeature(customer_age,
child_entity=es['sessions'])
log_age = DirectFeature(session_age,
child_entity=es['log'])
to_test = [(Add, [38, 37, 37.5, 37.5]),
(Subtract, [28, 29, 28.5, 28.5]),
(Multiply, [165, 132, 148.5, 148.5]),
(Divide, [6.6, 8.25, 22. / 3, 22. / 3])]
features = []
for test in to_test:
features.append(test[0](log_age, log_rating))
pandas_backend = PandasBackend(es, features)
df = pandas_backend.calculate_all_features(instance_ids=[0, 3, 5, 7],
time_last=None)
for i, test in enumerate(to_test):
v = df[features[i].get_name()].values.tolist()
assert v == test[1]
示例3: test_arithmetic_of_val
# 需要导入模块: from featuretools.computational_backends import PandasBackend [as 别名]
# 或者: from featuretools.computational_backends.PandasBackend import calculate_all_features [as 别名]
def test_arithmetic_of_val(es):
to_test = [(Add, [2.0, 7.0, 12.0, 17.0], [2.0, 7.0, 12.0, 17.0]),
(Subtract, [-2.0, 3.0, 8.0, 13.0], [2.0, -3.0, -8.0, -13.0]),
(Multiply, [0, 10, 20, 30], [0, 10, 20, 30]),
(Divide, [0, 2.5, 5, 7.5], [np.inf, 0.4, 0.2, 2 / 15.0],
[np.nan, np.inf, np.inf, np.inf])]
features = []
logs = es['log']
for test in to_test:
features.append(test[0](logs['value'], 2))
features.append(test[0](2, logs['value']))
features.append(Divide(logs['value'], 0))
pandas_backend = PandasBackend(es, features)
df = pandas_backend.calculate_all_features(instance_ids=[0, 1, 2, 3],
time_last=None)
for i, test in enumerate(to_test):
v = df[features[2 * i].get_name()].values.tolist()
assert v == test[1]
v = df[features[2 * i + 1].get_name()].values.tolist()
assert v == test[2]
test = to_test[-1][-1]
v = df[features[-1].get_name()].values.tolist()
assert (np.isnan(v[0]))
assert v[1:] == test[1:]
示例4: test_cum_sum_group_on_nan
# 需要导入模块: from featuretools.computational_backends import PandasBackend [as 别名]
# 或者: from featuretools.computational_backends.PandasBackend import calculate_all_features [as 别名]
def test_cum_sum_group_on_nan(es):
log_value_feat = es['log']['value']
es['log'].df['product_id'] = (['coke zero'] * 3 + ['car'] * 2 +
['toothpaste'] * 3 + ['brown bag'] * 2 +
['shoes'] +
[np.nan] * 4 +
['coke_zero'] * 2)
cum_sum = CumSum(log_value_feat, es['log']['product_id'])
features = [cum_sum]
pandas_backend = PandasBackend(es, features)
df = pandas_backend.calculate_all_features(instance_ids=range(15),
time_last=None)
cvalues = df[cum_sum.get_name()].values
assert len(cvalues) == 15
cum_sum_values = [0, 5, 15,
15, 35,
0, 1, 3,
3, 3,
0,
np.nan, np.nan, np.nan, np.nan]
for i, v in enumerate(cum_sum_values):
if np.isnan(v):
assert (np.isnan(cvalues[i]))
else:
assert v == cvalues[i]
示例5: test_percentile_with_cutoff
# 需要导入模块: from featuretools.computational_backends import PandasBackend [as 别名]
# 或者: from featuretools.computational_backends.PandasBackend import calculate_all_features [as 别名]
def test_percentile_with_cutoff(es):
v = Feature(es['log']['value'])
p = Percentile(v)
pandas_backend = PandasBackend(es, [p])
df = pandas_backend.calculate_all_features(
[2], pd.Timestamp('2011/04/09 10:30:13'))
assert df[p.get_name()].tolist()[0] == 1.0
示例6: test_cum_sum_use_previous_group_on_nan
# 需要导入模块: from featuretools.computational_backends import PandasBackend [as 别名]
# 或者: from featuretools.computational_backends.PandasBackend import calculate_all_features [as 别名]
def test_cum_sum_use_previous_group_on_nan(es):
# TODO: Figure out how to test where `df`
# in pd_rolling get_function() has multiindex
log_value_feat = es['log']['value']
es['log'].df['product_id'] = (['coke zero'] * 3 + ['car'] * 2 +
['toothpaste'] * 3 + ['brown bag'] * 2 +
['shoes'] +
[np.nan] * 4 +
['coke_zero'] * 2)
cum_sum = CumSum(log_value_feat,
es['log']['product_id'],
es["log"]["datetime"],
use_previous=Timedelta(40, 'seconds'))
features = [cum_sum]
pandas_backend = PandasBackend(es, features)
df = pandas_backend.calculate_all_features(instance_ids=range(15),
time_last=None)
cvalues = df[cum_sum.get_name()].values
assert len(cvalues) == 15
cum_sum_values = [0, 5, 15,
15, 35,
0, 1, 3,
3, 0,
0,
np.nan, np.nan, np.nan, np.nan]
for i, v in enumerate(cum_sum_values):
if np.isnan(v):
assert (np.isnan(cvalues[i]))
else:
assert v == cvalues[i]
示例7: test_diff_single_value
# 需要导入模块: from featuretools.computational_backends import PandasBackend [as 别名]
# 或者: from featuretools.computational_backends.PandasBackend import calculate_all_features [as 别名]
def test_diff_single_value(es):
diff = Diff(es['stores']['num_square_feet'], es['stores'][u'région_id'])
pandas_backend = PandasBackend(es, [diff])
df = pandas_backend.calculate_all_features(instance_ids=[5],
time_last=None)
assert df.shape[0] == 1
assert df[diff.get_name()].dropna().shape[0] == 0
示例8: test_diff
# 需要导入模块: from featuretools.computational_backends import PandasBackend [as 别名]
# 或者: from featuretools.computational_backends.PandasBackend import calculate_all_features [as 别名]
def test_diff(es):
value = IdentityFeature(es['log']['value'])
customer_id_feat = \
DirectFeature(es['sessions']['customer_id'],
child_entity=es['log'])
diff1 = Diff(value, es['log']['session_id'])
diff2 = Diff(value, customer_id_feat)
pandas_backend = PandasBackend(es, [diff1, diff2])
df = pandas_backend.calculate_all_features(instance_ids=range(15),
time_last=None)
val1 = df[diff1.get_name()].values.tolist()
val2 = df[diff2.get_name()].values.tolist()
correct_vals1 = [
np.nan, 5, 5, 5, 5, np.nan, 1, 1, 1, np.nan, np.nan, 5, np.nan, 7, 7
]
correct_vals2 = [np.nan, 5, 5, 5, 5, -20, 1, 1, 1, -3, np.nan, 5, -5, 7, 7]
for i, v in enumerate(val1):
v1 = val1[i]
if np.isnan(v1):
assert (np.isnan(correct_vals1[i]))
else:
assert v1 == correct_vals1[i]
v2 = val2[i]
if np.isnan(v2):
assert (np.isnan(correct_vals2[i]))
else:
assert v2 == correct_vals2[i]
示例9: test_override_cmp
# 需要导入模块: from featuretools.computational_backends import PandasBackend [as 别名]
# 或者: from featuretools.computational_backends.PandasBackend import calculate_all_features [as 别名]
def test_override_cmp(es):
count = Count(es['log']['id'], es['sessions'])
_sum = Sum(es['log']['value'], es['sessions'])
gt_lo = count > 1
gt_other = count > _sum
ge_lo = count >= 1
ge_other = count >= _sum
lt_hi = count < 10
lt_other = count < _sum
le_hi = count <= 10
le_other = count <= _sum
ne_lo = count != 1
ne_other = count != _sum
to_test = [[True, True, False],
[False, False, True],
[True, True, True],
[False, False, True],
[True, True, True],
[True, True, False],
[True, True, True],
[True, True, False]]
features = [gt_lo, gt_other, ge_lo, ge_other, lt_hi,
lt_other, le_hi, le_other, ne_lo, ne_other]
pandas_backend = PandasBackend(es, features)
df = pandas_backend.calculate_all_features(instance_ids=[0, 1, 2],
time_last=None)
for i, test in enumerate(to_test):
v = df[features[i].get_name()].values.tolist()
assert v == test
示例10: test_isin_feat_other_syntax_int
# 需要导入模块: from featuretools.computational_backends import PandasBackend [as 别名]
# 或者: from featuretools.computational_backends.PandasBackend import calculate_all_features [as 别名]
def test_isin_feat_other_syntax_int(es):
isin = Feature(es['log']['value']).isin([5, 10])
features = [isin]
pandas_backend = PandasBackend(es, features)
df = pandas_backend.calculate_all_features(range(8), None)
true = [False, True, True, False, False, False, False, False]
v = df[isin.get_name()].values.tolist()
assert true == v
示例11: test_make_trans_feat
# 需要导入模块: from featuretools.computational_backends import PandasBackend [as 别名]
# 或者: from featuretools.computational_backends.PandasBackend import calculate_all_features [as 别名]
def test_make_trans_feat(es):
f = Hour(es['log']['datetime'])
pandas_backend = PandasBackend(es, [f])
df = pandas_backend.calculate_all_features(instance_ids=[0],
time_last=None)
v = df[f.get_name()][0]
assert v == 10
示例12: test_isin_feat_other_syntax
# 需要导入模块: from featuretools.computational_backends import PandasBackend [as 别名]
# 或者: from featuretools.computational_backends.PandasBackend import calculate_all_features [as 别名]
def test_isin_feat_other_syntax(es):
isin = Feature(es['log']['product_id']).isin(["toothpaste", "coke zero"])
features = [isin]
pandas_backend = PandasBackend(es, features)
df = pandas_backend.calculate_all_features(range(8), None)
true = [True, True, True, False, False, True, True, True]
v = df[isin.get_name()].values.tolist()
assert true == v
示例13: test_direct_from_variable
# 需要导入模块: from featuretools.computational_backends import PandasBackend [as 别名]
# 或者: from featuretools.computational_backends.PandasBackend import calculate_all_features [as 别名]
def test_direct_from_variable(es):
# should be same behavior as test_direct_from_identity
d = DirectFeature(base_feature=es['sessions']['device_type'],
child_entity=es['log'])
pandas_backend = PandasBackend(es, [d])
df = pandas_backend.calculate_all_features(instance_ids=[0, 5],
time_last=None)
v = df[d.get_name()].tolist()
assert v == [0, 1]
示例14: test_percentile
# 需要导入模块: from featuretools.computational_backends import PandasBackend [as 别名]
# 或者: from featuretools.computational_backends.PandasBackend import calculate_all_features [as 别名]
def test_percentile(es):
v = Feature(es['log']['value'])
p = Percentile(v)
pandas_backend = PandasBackend(es, [p])
df = pandas_backend.calculate_all_features(range(10, 17), None)
true = es['log'].df[v.get_name()].rank(pct=True)
true = true.loc[range(10, 17)]
for t, a in zip(true.values, df[p.get_name()].values):
assert (pd.isnull(t) and pd.isnull(a)) or t == a
示例15: test_isin_feat_custom
# 需要导入模块: from featuretools.computational_backends import PandasBackend [as 别名]
# 或者: from featuretools.computational_backends.PandasBackend import calculate_all_features [as 别名]
def test_isin_feat_custom(es):
def pd_is_in(array, list_of_outputs=None):
if list_of_outputs is None:
list_of_outputs = []
return pd.Series(array).isin(list_of_outputs)
def isin_generate_name(self):
return u"%s.isin(%s)" % (self.base_features[0].get_name(),
str(self.kwargs['list_of_outputs']))
IsIn = make_trans_primitive(
pd_is_in,
[Variable],
Boolean,
name="is_in",
description="For each value of the base feature, checks whether it is "
"in a list that is provided.",
cls_attributes={"generate_name": isin_generate_name})
isin = IsIn(es['log']['product_id'],
list_of_outputs=["toothpaste", "coke zero"])
features = [isin]
pandas_backend = PandasBackend(es, features)
df = pandas_backend.calculate_all_features(range(8), None)
true = [True, True, True, False, False, True, True, True]
v = df[isin.get_name()].values.tolist()
assert true == v
isin = Feature(es['log']['product_id']).isin(["toothpaste", "coke zero"])
features = [isin]
pandas_backend = PandasBackend(es, features)
df = pandas_backend.calculate_all_features(range(8), None)
true = [True, True, True, False, False, True, True, True]
v = df[isin.get_name()].values.tolist()
assert true == v
isin = Feature(es['log']['value']).isin([5, 10])
features = [isin]
pandas_backend = PandasBackend(es, features)
df = pandas_backend.calculate_all_features(range(8), None)
true = [False, True, True, False, False, False, False, False]
v = df[isin.get_name()].values.tolist()
assert true == v