本文整理汇总了Python中pandas._libs.hashtable.Factorizer方法的典型用法代码示例。如果您正苦于以下问题:Python hashtable.Factorizer方法的具体用法?Python hashtable.Factorizer怎么用?Python hashtable.Factorizer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas._libs.hashtable
的用法示例。
在下文中一共展示了hashtable.Factorizer方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _factorize_keys
# 需要导入模块: from pandas._libs import hashtable [as 别名]
# 或者: from pandas._libs.hashtable import Factorizer [as 别名]
def _factorize_keys(lk, rk, sort=True):
if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk):
lk = lk.values
rk = rk.values
# if we exactly match in categories, allow us to factorize on codes
if (is_categorical_dtype(lk) and
is_categorical_dtype(rk) and
lk.is_dtype_equal(rk)):
klass = libhashtable.Int64Factorizer
if lk.categories.equals(rk.categories):
rk = rk.codes
else:
# Same categories in different orders -> recode
rk = _recode_for_categories(rk.codes, rk.categories, lk.categories)
lk = _ensure_int64(lk.codes)
rk = _ensure_int64(rk)
elif is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk):
klass = libhashtable.Int64Factorizer
lk = _ensure_int64(com._values_from_object(lk))
rk = _ensure_int64(com._values_from_object(rk))
else:
klass = libhashtable.Factorizer
lk = _ensure_object(lk)
rk = _ensure_object(rk)
rizer = klass(max(len(lk), len(rk)))
llab = rizer.factorize(lk)
rlab = rizer.factorize(rk)
count = rizer.get_count()
if sort:
uniques = rizer.uniques.to_array()
llab, rlab = _sort_labels(uniques, llab, rlab)
# NA group
lmask = llab == -1
lany = lmask.any()
rmask = rlab == -1
rany = rmask.any()
if lany or rany:
if lany:
np.putmask(llab, lmask, count)
if rany:
np.putmask(rlab, rmask, count)
count += 1
return llab, rlab, count
示例2: _factorize_keys
# 需要导入模块: from pandas._libs import hashtable [as 别名]
# 或者: from pandas._libs.hashtable import Factorizer [as 别名]
def _factorize_keys(lk, rk, sort=True):
if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk):
lk = lk.values
rk = rk.values
# if we exactly match in categories, allow us to factorize on codes
if (is_categorical_dtype(lk) and
is_categorical_dtype(rk) and
lk.is_dtype_equal(rk)):
klass = libhashtable.Int64Factorizer
lk = _ensure_int64(lk.codes)
rk = _ensure_int64(rk.codes)
elif is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk):
klass = libhashtable.Int64Factorizer
lk = _ensure_int64(com._values_from_object(lk))
rk = _ensure_int64(com._values_from_object(rk))
else:
klass = libhashtable.Factorizer
lk = _ensure_object(lk)
rk = _ensure_object(rk)
rizer = klass(max(len(lk), len(rk)))
llab = rizer.factorize(lk)
rlab = rizer.factorize(rk)
count = rizer.get_count()
if sort:
uniques = rizer.uniques.to_array()
llab, rlab = _sort_labels(uniques, llab, rlab)
# NA group
lmask = llab == -1
lany = lmask.any()
rmask = rlab == -1
rany = rmask.any()
if lany or rany:
if lany:
np.putmask(llab, lmask, count)
if rany:
np.putmask(rlab, rmask, count)
count += 1
return llab, rlab, count