本文整理汇总了Python中pandas.core.frame.DataFrame.from_dict方法的典型用法代码示例。如果您正苦于以下问题:Python DataFrame.from_dict方法的具体用法?Python DataFrame.from_dict怎么用?Python DataFrame.from_dict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas.core.frame.DataFrame
的用法示例。
在下文中一共展示了DataFrame.from_dict方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_categorical_order
# 需要导入模块: from pandas.core.frame import DataFrame [as 别名]
# 或者: from pandas.core.frame.DataFrame import from_dict [as 别名]
def test_categorical_order(self, file):
# Directly construct using expected codes
# Format is is_cat, col_name, labels (in order), underlying data
expected = [(True, 'ordered', ['a', 'b', 'c', 'd', 'e'], np.arange(5)),
(True, 'reverse', ['a', 'b', 'c',
'd', 'e'], np.arange(5)[::-1]),
(True, 'noorder', ['a', 'b', 'c', 'd',
'e'], np.array([2, 1, 4, 0, 3])),
(True, 'floating', [
'a', 'b', 'c', 'd', 'e'], np.arange(0, 5)),
(True, 'float_missing', [
'a', 'd', 'e'], np.array([0, 1, 2, -1, -1])),
(False, 'nolabel', [
1.0, 2.0, 3.0, 4.0, 5.0], np.arange(5)),
(True, 'int32_mixed', ['d', 2, 'e', 'b', 'a'],
np.arange(5))]
cols = []
for is_cat, col, labels, codes in expected:
if is_cat:
cols.append((col, pd.Categorical.from_codes(codes, labels)))
else:
cols.append((col, pd.Series(labels, dtype=np.float32)))
expected = DataFrame.from_dict(OrderedDict(cols))
# Read with and with out categoricals, ensure order is identical
file = getattr(self, file)
parsed = read_stata(file)
tm.assert_frame_equal(expected, parsed, check_categorical=False)
# Check identity of codes
for col in expected:
if is_categorical_dtype(expected[col]):
tm.assert_series_equal(expected[col].cat.codes,
parsed[col].cat.codes)
tm.assert_index_equal(expected[col].cat.categories,
parsed[col].cat.categories)
示例2: _do_convert_categoricals
# 需要导入模块: from pandas.core.frame import DataFrame [as 别名]
# 或者: from pandas.core.frame.DataFrame import from_dict [as 别名]
def _do_convert_categoricals(self, data, value_label_dict, lbllist,
order_categoricals):
"""
Converts categorical columns to Categorical type.
"""
value_labels = list(compat.iterkeys(value_label_dict))
cat_converted_data = []
for col, label in zip(data, lbllist):
if label in value_labels:
# Explicit call with ordered=True
cat_data = Categorical(data[col], ordered=order_categoricals)
categories = []
for category in cat_data.categories:
if category in value_label_dict[label]:
categories.append(value_label_dict[label][category])
else:
categories.append(category) # Partially labeled
try:
cat_data.categories = categories
except ValueError:
vc = Series(categories).value_counts()
repeats = list(vc.index[vc > 1])
repeats = '\n' + '-' * 80 + '\n'.join(repeats)
raise ValueError('Value labels for column {col} are not '
'unique. The repeated labels are:\n'
'{repeats}'
.format(col=col, repeats=repeats))
# TODO: is the next line needed above in the data(...) method?
cat_data = Series(cat_data, index=data.index)
cat_converted_data.append((col, cat_data))
else:
cat_converted_data.append((col, data[col]))
data = DataFrame.from_dict(OrderedDict(cat_converted_data))
return data
示例3: _prepare_categoricals
# 需要导入模块: from pandas.core.frame import DataFrame [as 别名]
# 或者: from pandas.core.frame.DataFrame import from_dict [as 别名]
def _prepare_categoricals(self, data):
"""Check for categorical columns, retain categorical information for
Stata file and convert categorical data to int"""
is_cat = [is_categorical_dtype(data[col]) for col in data]
self._is_col_cat = is_cat
self._value_labels = []
if not any(is_cat):
return data
get_base_missing_value = StataMissingValue.get_base_missing_value
data_formatted = []
for col, col_is_cat in zip(data, is_cat):
if col_is_cat:
self._value_labels.append(StataValueLabel(data[col]))
dtype = data[col].cat.codes.dtype
if dtype == np.int64:
raise ValueError('It is not possible to export '
'int64-based categorical data to Stata.')
values = data[col].cat.codes.values.copy()
# Upcast if needed so that correct missing values can be set
if values.max() >= get_base_missing_value(dtype):
if dtype == np.int8:
dtype = np.int16
elif dtype == np.int16:
dtype = np.int32
else:
dtype = np.float64
values = np.array(values, dtype=dtype)
# Replace missing values with Stata missing value for type
values[values == -1] = get_base_missing_value(dtype)
data_formatted.append((col, values))
else:
data_formatted.append((col, data[col]))
return DataFrame.from_dict(OrderedDict(data_formatted))
示例4: _do_convert_categoricals
# 需要导入模块: from pandas.core.frame import DataFrame [as 别名]
# 或者: from pandas.core.frame.DataFrame import from_dict [as 别名]
def _do_convert_categoricals(self, data, value_label_dict, lbllist,
order_categoricals):
"""
Converts categorical columns to Categorical type.
"""
value_labels = list(compat.iterkeys(value_label_dict))
cat_converted_data = []
for col, label in zip(data, lbllist):
if label in value_labels:
# Explicit call with ordered=True
cat_data = Categorical(data[col], ordered=order_categoricals)
categories = []
for category in cat_data.categories:
if category in value_label_dict[label]:
categories.append(value_label_dict[label][category])
else:
categories.append(category) # Partially labeled
try:
cat_data.categories = categories
except ValueError:
vc = Series(categories).value_counts()
repeats = list(vc.index[vc > 1])
repeats = '\n' + '-' * 80 + '\n'.join(repeats)
msg = 'Value labels for column {0} are not unique. The ' \
'repeated labels are:\n{1}'.format(col, repeats)
raise ValueError(msg)
# TODO: is the next line needed above in the data(...) method?
cat_data = Series(cat_data, index=data.index)
cat_converted_data.append((col, cat_data))
else:
cat_converted_data.append((col, data[col]))
data = DataFrame.from_dict(OrderedDict(cat_converted_data))
return data
示例5: describe
# 需要导入模块: from pandas.core.frame import DataFrame [as 别名]
# 或者: from pandas.core.frame.DataFrame import from_dict [as 别名]
def describe(self):
"""
Returns a dataframe with frequency and counts by level.
"""
# Hack?
from pandas.core.frame import DataFrame
grouped = DataFrame(self.labels).groupby(0)
counts = grouped.count().values.squeeze()
freqs = counts / float(counts.sum())
return DataFrame.from_dict({
'counts': counts,
'freqs': freqs,
'levels': self.levels
}).set_index('levels')
示例6: rpy2py_dataframe
# 需要导入模块: from pandas.core.frame import DataFrame [as 别名]
# 或者: from pandas.core.frame.DataFrame import from_dict [as 别名]
def rpy2py_dataframe(obj):
items = OrderedDict((k, rpy2py(v) if isinstance(v, Sexp) else v)
for k, v in obj.items())
res = PandasDataFrame.from_dict(items)
res.index = obj.rownames
return res