本文整理匯總了Python中pandas._libs.hashtable.Factorizer方法的典型用法代碼示例。如果您正苦於以下問題:Python hashtable.Factorizer方法的具體用法?Python hashtable.Factorizer怎麽用?Python hashtable.Factorizer使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pandas._libs.hashtable
的用法示例。
在下文中一共展示了hashtable.Factorizer方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: _factorize_keys
# 需要導入模塊: from pandas._libs import hashtable [as 別名]
# 或者: from pandas._libs.hashtable import Factorizer [as 別名]
def _factorize_keys(lk, rk, sort=True):
if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk):
lk = lk.values
rk = rk.values
# if we exactly match in categories, allow us to factorize on codes
if (is_categorical_dtype(lk) and
is_categorical_dtype(rk) and
lk.is_dtype_equal(rk)):
klass = libhashtable.Int64Factorizer
if lk.categories.equals(rk.categories):
rk = rk.codes
else:
# Same categories in different orders -> recode
rk = _recode_for_categories(rk.codes, rk.categories, lk.categories)
lk = _ensure_int64(lk.codes)
rk = _ensure_int64(rk)
elif is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk):
klass = libhashtable.Int64Factorizer
lk = _ensure_int64(com._values_from_object(lk))
rk = _ensure_int64(com._values_from_object(rk))
else:
klass = libhashtable.Factorizer
lk = _ensure_object(lk)
rk = _ensure_object(rk)
rizer = klass(max(len(lk), len(rk)))
llab = rizer.factorize(lk)
rlab = rizer.factorize(rk)
count = rizer.get_count()
if sort:
uniques = rizer.uniques.to_array()
llab, rlab = _sort_labels(uniques, llab, rlab)
# NA group
lmask = llab == -1
lany = lmask.any()
rmask = rlab == -1
rany = rmask.any()
if lany or rany:
if lany:
np.putmask(llab, lmask, count)
if rany:
np.putmask(rlab, rmask, count)
count += 1
return llab, rlab, count
示例2: _factorize_keys
# 需要導入模塊: from pandas._libs import hashtable [as 別名]
# 或者: from pandas._libs.hashtable import Factorizer [as 別名]
def _factorize_keys(lk, rk, sort=True):
if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk):
lk = lk.values
rk = rk.values
# if we exactly match in categories, allow us to factorize on codes
if (is_categorical_dtype(lk) and
is_categorical_dtype(rk) and
lk.is_dtype_equal(rk)):
klass = libhashtable.Int64Factorizer
lk = _ensure_int64(lk.codes)
rk = _ensure_int64(rk.codes)
elif is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk):
klass = libhashtable.Int64Factorizer
lk = _ensure_int64(com._values_from_object(lk))
rk = _ensure_int64(com._values_from_object(rk))
else:
klass = libhashtable.Factorizer
lk = _ensure_object(lk)
rk = _ensure_object(rk)
rizer = klass(max(len(lk), len(rk)))
llab = rizer.factorize(lk)
rlab = rizer.factorize(rk)
count = rizer.get_count()
if sort:
uniques = rizer.uniques.to_array()
llab, rlab = _sort_labels(uniques, llab, rlab)
# NA group
lmask = llab == -1
lany = lmask.any()
rmask = rlab == -1
rany = rmask.any()
if lany or rany:
if lany:
np.putmask(llab, lmask, count)
if rany:
np.putmask(rlab, rmask, count)
count += 1
return llab, rlab, count