本文整理汇总了Python中sklearn_pandas.DataFrameMapper类的典型用法代码示例。如果您正苦于以下问题:Python DataFrameMapper类的具体用法?Python DataFrameMapper怎么用?Python DataFrameMapper使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了DataFrameMapper类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_nonexistent_columns_explicit_fail
def test_nonexistent_columns_explicit_fail(simple_dataframe):
"""
If a nonexistent column is selected, KeyError is raised.
"""
mapper = DataFrameMapper(None)
with pytest.raises(KeyError):
mapper._get_col_subset(simple_dataframe, ["nonexistent_feature"])
示例2: preprocess_train
def preprocess_train(train):
train_y = train['count']
train_y1 = train['casual']
train_y2 = train['registered']
preprocess_data(train)
mapper = DataFrameMapper([
('hour', None),
('season', preprocessing.LabelBinarizer()),
('holiday', None),
('workingday', None),
('weather', preprocessing.LabelBinarizer()),
('temp', None),
('atemp', None),
('humidity', None),
('windspeed', None),
('weekday', None),
('is_sunday', None),
('bad_weather', None),
('year', None),
])
train_X = mapper.fit_transform(train)
return train_X, train_y, train_y1, train_y2, mapper
示例3: scale_vars
def scale_vars(df, mapper):
warnings.filterwarnings('ignore', category=sklearn.exceptions.DataConversionWarning)
if mapper is None:
map_f = [([n],StandardScaler()) for n in df.columns if is_numeric_dtype(df[n])]
mapper = DataFrameMapper(map_f).fit(df)
df[mapper.transformed_names_] = mapper.transform(df)
return mapper
示例4: compute_cross_correlation_score
def compute_cross_correlation_score(df, clfs, preprocess_scaling=True, nFold=10):
"""
return an iterator with cross validation data
:param df:
:param clfs:
:param preprocess_scaling:
:param nFold:
:return:
"""
to_sklearn_features = DataFrameMapper([('features', sklearn.feature_extraction.DictVectorizer())])
data_X = to_sklearn_features.fit_transform(df)
data_Y = df.expected_class
skf = cross_validation.StratifiedKFold(data_Y, n_folds=nFold)
classification_results = []
scores = []
for num, (train_index, test_index) in enumerate(skf):
X_train, X_test = data_X[train_index], data_X[test_index]
Y_train, Y_test = data_Y[train_index], data_Y[test_index]
print("Len train{}, Len test{}".format(Y_train.size, Y_test.size))
cross_valid_data = Cross_validation_split(X_train, X_test, Y_train, Y_test)
cross_valid_data = preprocess(cross_valid_data, preprocess_scaling=preprocess_scaling, preprocess_correlation=False)
for clf in clfs:
score, classification = generate_score(clf, cross_valid_data, fold=num)
scores.append(score)
classification_results.append(classification)
return scores, classification_results
示例5: test_list_transformers_single_arg
def test_list_transformers_single_arg(simple_dataframe):
"""
Multiple transformers can be specified in a list even if some of them
only accept one X argument instead of two (X, y).
"""
mapper = DataFrameMapper([("a", [MockXTransformer()])])
# doesn't fail
mapper.fit_transform(simple_dataframe)
示例6: test_simple_df
def test_simple_df(simple_dataframe):
"""
Get a dataframe from a simple mapped dataframe
"""
df = simple_dataframe
mapper = DataFrameMapper([('a', None)], df_out=True)
transformed = mapper.fit_transform(df)
assert type(transformed) == pd.DataFrame
assert len(transformed["a"]) == len(simple_dataframe["a"])
示例7: test_transformed_names_complex_alias
def test_transformed_names_complex_alias(complex_dataframe):
"""
If we specify an alias for a multiple output column, it is used for the
output
"""
df = complex_dataframe
mapper = DataFrameMapper([('target', LabelBinarizer(), {'alias': 'new'})])
mapper.fit_transform(df)
assert mapper.transformed_names_ == ['new_a', 'new_b', 'new_c']
示例8: test_get_col_subset_single_column_array
def test_get_col_subset_single_column_array(simple_dataframe):
"""
Selecting a single column should return a 1-dimensional numpy array.
"""
mapper = DataFrameMapper(None)
array = mapper._get_col_subset(simple_dataframe, "a")
assert type(array) == np.ndarray
assert array.shape == (len(simple_dataframe["a"]),)
示例9: test_transformed_names_binarizer
def test_transformed_names_binarizer(complex_dataframe):
"""
Get transformed names of features in `transformed_names` attribute
for a transformation that multiplies the number of columns
"""
df = complex_dataframe
mapper = DataFrameMapper([('target', LabelBinarizer())])
mapper.fit_transform(df)
assert mapper.transformed_names_ == ['target_a', 'target_b', 'target_c']
示例10: test_transformed_names_simple
def test_transformed_names_simple(simple_dataframe):
"""
Get transformed names of features in `transformed_names` attribute
for simple transformation
"""
df = simple_dataframe
mapper = DataFrameMapper([('a', None)])
mapper.fit_transform(df)
assert mapper.transformed_names_ == ['a']
示例11: test_mapper
def test_mapper(self):
domain = CategoricalDomain()
df = DataFrame([{"X" : "2", "y" : 2}, {"X" : "1"}, {"X" : "3"}])
mapper = DataFrameMapper([
("X", [domain, LabelBinarizer()]),
("y", None)
])
mapper.fit_transform(df)
self.assertEqual(numpy.array(["1", "2", "3"]).tolist(), domain.data_.tolist())
示例12: test_transformed_names_simple_alias
def test_transformed_names_simple_alias(simple_dataframe):
"""
If we specify an alias for a single output column, it is used for the
output
"""
df = simple_dataframe
mapper = DataFrameMapper([('a', None, {'alias': 'new_name'})])
mapper.fit_transform(df)
assert mapper.transformed_names_ == ['new_name']
示例13: test_default_none_names
def test_default_none_names():
"""
If default=None, column names are returned unmodified.
"""
df = pd.DataFrame({'a': [1, 2, 3], 'b': [3, 5, 7]})
mapper = DataFrameMapper([], default=None)
mapper.fit_transform(df)
assert mapper.transformed_names_ == ['a', 'b']
示例14: Transformer
class Transformer(object):
"""
The purpose of this class is to take a dataframe and transform it into
a numpy array compatible format.
"""
def __init__(self, config):
self.__config = config
self.__mapper = None
self.__label_encoder_adapter = TransformerAdapter(LabelEncoderMissingValuesTransformer())
def prepare(self, dataframe):
"""
Takes the already cleaned dataframe, splits it into train and test
and returns the train and test as numpy arrays.
If the problem is supervised, the target column will be that last one
of the returned arrays.
"""
mapping = DataFrameMapCreator().get_mapping_from_config(self.__config)
self.__mapper = DataFrameMapper(mapping)
train, test = split_dataframe_train_test(dataframe, self.__config.get_option_parameter("split", "train_percentage"))
return self.__get_correct_return_parameters(train, test)
def __get_correct_return_parameters(self, train, test):
model = self.__config.get_data_model()
train_transformed = self.__mapper.fit_transform(train)
test_transformed = self.__mapper.transform(test)
if model.has_target():
return self.__add_target_data(train_transformed, train), \
self.__add_target_data(test_transformed, test)
else:
return train_transformed, test_transformed
def __add_target_data(self, transformed_data, original_data):
"""
Picks up the target data from the original_data and appends it as a
column to the transformed_data.
Both arguments are expected to be np.array's.
"""
model = self.__config.get_data_model()
target_feature = model.find_target_feature()
name = target_feature.get_name()
if target_feature.is_categorical():
target_row = original_data[name]
target = self.__label_encoder_adapter.transform(target_row)
else:
target = original_data[name].values.astype(type_name_to_data_type("float"))
target = target[..., None]
return np.hstack((transformed_data, target))
def apply(self, dataframe):
return self.__mapper.transform(dataframe)
示例15: test_fit_with_optional_y_arg
def test_fit_with_optional_y_arg(complex_dataframe):
"""
Transformers with an optional y argument in the fit method
are handled correctly
"""
df = complex_dataframe
mapper = DataFrameMapper([(['feat1', 'feat2'], MockTClassifier())])
# doesn't fail
mapper.fit(df[['feat1', 'feat2']], df['target'])