本文整理汇总了Python中pandas.testing.assert_series_equal方法的典型用法代码示例。如果您正苦于以下问题:Python testing.assert_series_equal方法的具体用法?Python testing.assert_series_equal怎么用?Python testing.assert_series_equal使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas.testing
的用法示例。
在下文中一共展示了testing.assert_series_equal方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_clean
# 需要导入模块: from pandas import testing [as 别名]
# 或者: from pandas.testing import assert_series_equal [as 别名]
def test_clean(self):
values = pd.Series([
'Mary-ann', 'Bob :)', 'Angel', 'Bob (alias Billy)', 'Mary ann',
'John', np.nan
])
expected = pd.Series(
['mary ann', 'bob', 'angel', 'bob', 'mary ann', 'john', np.nan])
clean_series = clean(values)
# Check if series are identical.
pdt.assert_series_equal(clean_series, expected)
clean_series_nothing = clean(
values,
lowercase=False,
replace_by_none=False,
replace_by_whitespace=False,
strip_accents=False,
remove_brackets=False)
# Check if ntohing happend.
pdt.assert_series_equal(clean_series_nothing, values)
示例2: test_clean_parameters
# 需要导入模块: from pandas import testing [as 别名]
# 或者: from pandas.testing import assert_series_equal [as 别名]
def test_clean_parameters(self):
values = pd.Series([
u'Mary-ann', u'Bob :)', u'Angel', u'Bob (alias Billy)',
u'Mary ann', u'John', np.nan
])
expected = pd.Series([
u'mary ann', u'bob', u'angel', u'bob', u'mary ann', u'john', np.nan
])
clean_series = clean(
values,
lowercase=True,
replace_by_none=r'[^ \-\_A-Za-z0-9]+',
replace_by_whitespace=r'[\-\_]',
remove_brackets=True)
# Check if series are identical.
pdt.assert_series_equal(clean_series, expected)
示例3: test_numeric
# 需要导入模块: from pandas import testing [as 别名]
# 或者: from pandas.testing import assert_series_equal [as 别名]
def test_numeric(self):
A = DataFrame({'col': [1, 1, 1, nan, 0]})
B = DataFrame({'col': [1, 2, 3, nan, nan]})
ix = MultiIndex.from_arrays([A.index.values, B.index.values])
comp = recordlinkage.Compare()
comp.numeric('col', 'col', 'step', offset=2)
comp.numeric('col', 'col', method='step', offset=2)
comp.numeric('col', 'col', 'step', 2)
result = comp.compute(ix, A, B)
# Basics
expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=0)
pdt.assert_series_equal(result[0], expected)
# Basics
expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=1)
pdt.assert_series_equal(result[1], expected)
# Basics
expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=2)
pdt.assert_series_equal(result[2], expected)
示例4: test_dates
# 需要导入模块: from pandas import testing [as 别名]
# 或者: from pandas.testing import assert_series_equal [as 别名]
def test_dates(self):
A = DataFrame({
'col':
to_datetime(
['2005/11/23', nan, '2004/11/23', '2010/01/10', '2010/10/30'])
})
B = DataFrame({
'col':
to_datetime([
'2005/11/23', '2010/12/31', '2005/11/23', '2010/10/01',
'2010/9/30'
])
})
ix = MultiIndex.from_arrays([A.index.values, B.index.values])
comp = recordlinkage.Compare()
comp.date('col', 'col')
result = comp.compute(ix, A, B)[0]
expected = Series([1, 0, 0, 0.5, 0.5], index=ix, name=0)
pdt.assert_series_equal(result, expected)
示例5: test_geo
# 需要导入模块: from pandas import testing [as 别名]
# 或者: from pandas.testing import assert_series_equal [as 别名]
def test_geo(self):
# Utrecht, Amsterdam, Rotterdam (Cities in The Netherlands)
A = DataFrame({
'lat': [52.0842455, 52.3747388, 51.9280573],
'lng': [5.0124516, 4.7585305, 4.4203581]
})
B = DataFrame({
'lat': [52.3747388, 51.9280573, 52.0842455],
'lng': [4.7585305, 4.4203581, 5.0124516]
})
ix = MultiIndex.from_arrays([A.index.values, B.index.values])
comp = recordlinkage.Compare()
comp.geo(
'lat', 'lng', 'lat', 'lng', method='step',
offset=50) # 50 km range
result = comp.compute(ix, A, B)
# Missing values as default [36.639460, 54.765854, 44.092472]
expected = Series([1.0, 0.0, 1.0], index=ix, name=0)
pdt.assert_series_equal(result[0], expected)
示例6: test_defaults
# 需要导入模块: from pandas import testing [as 别名]
# 或者: from pandas.testing import assert_series_equal [as 别名]
def test_defaults(self):
# default algorithm is levenshtein algorithm
# test default values are indentical to levenshtein
A = DataFrame({
'col': [u'str_abc', u'str_abc', u'str_abc', nan, u'hsdkf']
})
B = DataFrame({'col': [u'str_abc', u'str_abd', u'jaskdfsd', nan, nan]})
ix = MultiIndex.from_arrays([A.index.values, B.index.values])
comp = recordlinkage.Compare()
comp.string('col', 'col', label='default')
comp.string('col', 'col', method='levenshtein', label='with_args')
result = comp.compute(ix, A, B)
pdt.assert_series_equal(
result['default'].rename(None),
result['with_args'].rename(None)
)
示例7: test_variable_nan
# 需要导入模块: from pandas import testing [as 别名]
# 或者: from pandas.testing import assert_series_equal [as 别名]
def test_variable_nan(self, missing_value):
# data
arrayA = np.random.random((100,))
arrayA[90:] = np.nan
arrayB = np.random.random((100,))
# convert to pandas data
A = DataFrame({'col': arrayA})
B = DataFrame({'col': arrayB})
ix = MultiIndex.from_arrays([A.index.values, B.index.values])
# the part to test
from recordlinkage.compare import Variable
comp = recordlinkage.Compare()
comp.add(Variable(left_on='col', missing_value=missing_value))
features = comp.compute(ix, A, B)
result = features[0].rename(None)
expected = Series(arrayA, index=ix)
expected.iloc[90:] = missing_value
pdt.assert_series_equal(result, expected)
示例8: test_consolidate_parameter_attribute_standard_wildcards
# 需要导入模块: from pandas import testing [as 别名]
# 或者: from pandas.testing import assert_series_equal [as 别名]
def test_consolidate_parameter_attribute_standard_wildcards():
tuples = [("a", 0), ("a", 1), ("b", 1), ("b", 2)]
ind = pd.MultiIndex.from_tuples(tuples, names=["ind1", "ind2"])
df = pd.DataFrame(index=ind[:3])
df["attr"] = ["g1", "g2", "g3"]
df["other"] = [1, 2, 3]
df2 = pd.DataFrame(index=ind)
df2["attr"] = ["g1", "g2", "g3", "g2"]
df2["other2"] = [11, 22, 33, 44]
info = {}
compatible_input = [OPT_RES(df, info), OPT_RES(df2, info)]
attribute = "attr"
res = test_module._consolidate_parameter_attribute(
results=compatible_input, attribute=attribute
)
expected = pd.Series(["g1", "g2", "g3", "g2"], index=ind, name="attr")
pdt.assert_series_equal(res, expected)
示例9: test_consolidate_parameter_attribute_custom_wildcards
# 需要导入模块: from pandas import testing [as 别名]
# 或者: from pandas.testing import assert_series_equal [as 别名]
def test_consolidate_parameter_attribute_custom_wildcards():
tuples = [("a", 0), ("a", 1), ("b", 1), ("b", 2)]
ind = pd.MultiIndex.from_tuples(tuples, names=["ind1", "ind2"])
df = pd.DataFrame(index=ind[:3])
df["attr"] = ["g1", None, "g3"]
df["other"] = [1, 2, 3]
df2 = pd.DataFrame(index=ind)
df2["attr"] = ["g1", "g2", "g3", 0]
df2["other2"] = [11, 22, 33, 44]
info = {}
compatible_input = [OPT_RES(df, info), OPT_RES(df2, info)]
attribute = "attr"
res = test_module._consolidate_parameter_attribute(
results=compatible_input, attribute=attribute, wildcards=[0, None]
)
expected = pd.Series(["g1", "g2", "g3", np.nan], index=ind, name="attr")
pdt.assert_series_equal(res, expected)
示例10: test_calculate_x_bounds_without_nan
# 需要导入模块: from pandas import testing [as 别名]
# 或者: from pandas.testing import assert_series_equal [as 别名]
def test_calculate_x_bounds_without_nan():
params_data = pd.DataFrame()
params_data["group"] = ["a", "a", "a"] + ["b", "b", "b"]
params_data["value"] = [0, 1, 2] + [3, 4, 5]
params_data["conf_int_lower"] = [-1, 0, -2] + [2, -5, 4]
params_data["conf_int_upper"] = [1, 2, 3] + [3, 5, 10]
padding = 0.0
res_x_min, res_x_max = test_module._calculate_x_bounds(params_data, padding)
ind = pd.Index(["a", "b"], name="group")
expected_x_min = pd.Series([-2.0, -5.0], index=ind, name="x_min")
expected_x_max = pd.Series([3.0, 10.0], index=ind, name="x_max")
pdt.assert_series_equal(expected_x_min, res_x_min)
pdt.assert_series_equal(expected_x_max, res_x_max)
示例11: test_calculate_x_bounds_with_nan
# 需要导入模块: from pandas import testing [as 别名]
# 或者: from pandas.testing import assert_series_equal [as 别名]
def test_calculate_x_bounds_with_nan():
params_data = pd.DataFrame()
params_data["group"] = ["a", "a", "a"] + ["b", "b", "b"]
params_data["value"] = [0, 1, np.nan] + [3, np.nan, 5]
params_data["conf_int_lower"] = np.nan
params_data["conf_int_upper"] = np.nan
padding = 0.0
res_x_min, res_x_max = test_module._calculate_x_bounds(params_data, padding)
ind = pd.Index(["a", "b"], name="group")
expected_x_min = pd.Series([0.0, 3.0], index=ind, name="x_min")
expected_x_max = pd.Series([1.0, 5.0], index=ind, name="x_max")
pdt.assert_series_equal(expected_x_min, res_x_min)
pdt.assert_series_equal(expected_x_max, res_x_max)
示例12: test_calculate_x_bounds_with_padding
# 需要导入模块: from pandas import testing [as 别名]
# 或者: from pandas.testing import assert_series_equal [as 别名]
def test_calculate_x_bounds_with_padding():
params_data = pd.DataFrame()
params_data["group"] = ["a", "a", "a"] + ["b", "b", "b"]
params_data["value"] = [0, 1, np.nan] + [3, np.nan, 5]
params_data["conf_int_lower"] = np.nan
params_data["conf_int_upper"] = np.nan
padding = 0.1
res_x_min, res_x_max = test_module._calculate_x_bounds(params_data, padding)
ind = pd.Index(["a", "b"], name="group")
expected_x_min = pd.Series([-0.1, 2.8], index=ind, name="x_min")
expected_x_max = pd.Series([1.1, 5.2], index=ind, name="x_max")
pdt.assert_series_equal(expected_x_min, res_x_min)
pdt.assert_series_equal(expected_x_max, res_x_max)
# replace_by_midpoint
# ====================
示例13: test_pandas_from_arrow
# 需要导入模块: from pandas import testing [as 别名]
# 或者: from pandas.testing import assert_series_equal [as 别名]
def test_pandas_from_arrow():
arr = pa.array(["a", "b", "c"], pa.string())
expected_series_woutname = pd.Series(fr.FletcherChunkedArray(arr))
pdt.assert_series_equal(expected_series_woutname, fr.pandas_from_arrow(arr))
expected_series_woutname = pd.Series(fr.FletcherContinuousArray(arr))
pdt.assert_series_equal(
expected_series_woutname, fr.pandas_from_arrow(arr, continuous=True)
)
rb = pa.RecordBatch.from_arrays([arr], ["column"])
expected_df = pd.DataFrame({"column": fr.FletcherChunkedArray(arr)})
table = pa.Table.from_arrays([arr], ["column"])
pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(rb))
pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(table))
expected_df = pd.DataFrame({"column": fr.FletcherContinuousArray(arr)})
table = pa.Table.from_arrays([arr], ["column"])
pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(rb, continuous=True))
pdt.assert_frame_equal(expected_df, fr.pandas_from_arrow(table, continuous=True))
示例14: test_text_zfill
# 需要导入模块: from pandas import testing [as 别名]
# 或者: from pandas.testing import assert_series_equal [as 别名]
def test_text_zfill(data, fletcher_variant):
if any("\x00" in x for x in data if x):
# pytest.skip("pandas cannot handle \\x00 characters in tests")
# Skip is not working properly with hypothesis
return
ser_pd = pd.Series(data, dtype=str)
max_str_len = ser_pd.map(_optional_len).max()
if pd.isna(max_str_len):
max_str_len = 0
arrow_data = pa.array(data, type=pa.string())
if fletcher_variant == "chunked":
fr_array = fr.FletcherChunkedArray(arrow_data)
else:
fr_array = fr.FletcherContinuousArray(arrow_data)
ser_fr = pd.Series(fr_array)
result_pd = ser_pd.str.zfill(max_str_len + 1)
result_fr = ser_fr.fr_text.zfill(max_str_len + 1)
result_fr = result_fr.astype(object)
# Pandas returns np.nan for NA values in cat, keep this in line
result_fr[result_fr.isna()] = np.nan
tm.assert_series_equal(result_fr, result_pd)
示例15: test_dataframe_with_categoricals_ignored
# 需要导入模块: from pandas import testing [as 别名]
# 或者: from pandas.testing import assert_series_equal [as 别名]
def test_dataframe_with_categoricals_ignored(builder):
df_value = pd.DataFrame()
df_value["cat"] = pd.Categorical(
["red", "blue", "red"], categories=["blue", "red"], ordered=True
)
@builder
@bn.protocol.frame(check_dtypes=False)
def df():
return df_value
pdt.assert_series_equal(
# Whether or not the deserialized column has the Categorical Dtype can
# depend on the version of pyarrow being used, so we'll just convert
# both columns to the same type here.
builder.build().get("df")["cat"].astype(object),
df_value["cat"].astype(object),
)