当前位置: 首页>>代码示例>>Python>>正文


Python sklearn_pandas.DataFrameMapper类代码示例

本文整理汇总了Python中sklearn_pandas.DataFrameMapper的典型用法代码示例。如果您正苦于以下问题:Python DataFrameMapper类的具体用法?Python DataFrameMapper怎么用?Python DataFrameMapper使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了DataFrameMapper类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_nonexistent_columns_explicit_fail

def test_nonexistent_columns_explicit_fail(simple_dataframe):
    """
    If a nonexistent column is selected, KeyError is raised.
    """
    mapper = DataFrameMapper(None)
    with pytest.raises(KeyError):
        mapper._get_col_subset(simple_dataframe, ["nonexistent_feature"])
开发者ID:yonglehou,项目名称:sklearn-pandas,代码行数:7,代码来源:test_dataframe_mapper.py

示例2: preprocess_train

def preprocess_train(train):
    train_y = train['count']
    train_y1 = train['casual']
    train_y2 = train['registered']

    preprocess_data(train)

    mapper = DataFrameMapper([
        ('hour', None),
        ('season', preprocessing.LabelBinarizer()),
        ('holiday', None),
        ('workingday', None),
        ('weather', preprocessing.LabelBinarizer()),
        ('temp', None),
        ('atemp', None),
        ('humidity', None),
        ('windspeed', None),
        ('weekday', None),
        ('is_sunday', None),
        ('bad_weather', None),
        ('year', None),
    ])

    train_X = mapper.fit_transform(train)
    return train_X, train_y, train_y1, train_y2, mapper
开发者ID:adcaes,项目名称:kaggelCompetitions,代码行数:25,代码来源:predict.py

示例3: scale_vars

def scale_vars(df, mapper):
    warnings.filterwarnings('ignore', category=sklearn.exceptions.DataConversionWarning)
    if mapper is None:
        map_f = [([n],StandardScaler()) for n in df.columns if is_numeric_dtype(df[n])]
        mapper = DataFrameMapper(map_f).fit(df)
    df[mapper.transformed_names_] = mapper.transform(df)
    return mapper
开发者ID:gil2abir,项目名称:fastai,代码行数:7,代码来源:structured.py

示例4: compute_cross_correlation_score

def compute_cross_correlation_score(df, clfs, preprocess_scaling=True, nFold=10):
    """
    return an iterator with cross validation data
    :param df:
    :param clfs:
    :param preprocess_scaling:
    :param nFold:
    :return:
    """

    to_sklearn_features = DataFrameMapper([('features', sklearn.feature_extraction.DictVectorizer())])

    data_X = to_sklearn_features.fit_transform(df)
    data_Y = df.expected_class

    skf = cross_validation.StratifiedKFold(data_Y, n_folds=nFold)
    classification_results = []
    scores = []
    for num, (train_index, test_index) in enumerate(skf):
        X_train, X_test = data_X[train_index], data_X[test_index]
        Y_train, Y_test = data_Y[train_index], data_Y[test_index]
        print("Len train{}, Len test{}".format(Y_train.size, Y_test.size))
        cross_valid_data = Cross_validation_split(X_train, X_test, Y_train, Y_test)
        cross_valid_data = preprocess(cross_valid_data, preprocess_scaling=preprocess_scaling, preprocess_correlation=False)

        for clf in clfs:
            score, classification = generate_score(clf, cross_valid_data, fold=num)
            scores.append(score)
            classification_results.append(classification)
    return scores, classification_results
开发者ID:arventwei,项目名称:protolab_sound_recognition,代码行数:30,代码来源:evaluate_learning.py

示例5: test_list_transformers_single_arg

def test_list_transformers_single_arg(simple_dataframe):
    """
    Multiple transformers can be specified in a list even if some of them
    only accept one X argument instead of two (X, y).
    """
    mapper = DataFrameMapper([("a", [MockXTransformer()])])
    # doesn't fail
    mapper.fit_transform(simple_dataframe)
开发者ID:yonglehou,项目名称:sklearn-pandas,代码行数:8,代码来源:test_dataframe_mapper.py

示例6: test_simple_df

def test_simple_df(simple_dataframe):
    """
    Get a dataframe from a simple mapped dataframe
    """
    df = simple_dataframe
    mapper = DataFrameMapper([('a', None)], df_out=True)
    transformed = mapper.fit_transform(df)
    assert type(transformed) == pd.DataFrame
    assert len(transformed["a"]) == len(simple_dataframe["a"])
开发者ID:paulgb,项目名称:sklearn-pandas,代码行数:9,代码来源:test_dataframe_mapper.py

示例7: test_transformed_names_complex_alias

def test_transformed_names_complex_alias(complex_dataframe):
    """
    If we specify an alias for a multiple output column, it is used for the
    output
    """
    df = complex_dataframe
    mapper = DataFrameMapper([('target', LabelBinarizer(), {'alias': 'new'})])
    mapper.fit_transform(df)
    assert mapper.transformed_names_ == ['new_a', 'new_b', 'new_c']
开发者ID:paulgb,项目名称:sklearn-pandas,代码行数:9,代码来源:test_dataframe_mapper.py

示例8: test_get_col_subset_single_column_array

def test_get_col_subset_single_column_array(simple_dataframe):
    """
    Selecting a single column should return a 1-dimensional numpy array.
    """
    mapper = DataFrameMapper(None)
    array = mapper._get_col_subset(simple_dataframe, "a")

    assert type(array) == np.ndarray
    assert array.shape == (len(simple_dataframe["a"]),)
开发者ID:yonglehou,项目名称:sklearn-pandas,代码行数:9,代码来源:test_dataframe_mapper.py

示例9: test_transformed_names_binarizer

def test_transformed_names_binarizer(complex_dataframe):
    """
    Get transformed names of features in `transformed_names` attribute
    for a transformation that multiplies the number of columns
    """
    df = complex_dataframe
    mapper = DataFrameMapper([('target', LabelBinarizer())])
    mapper.fit_transform(df)
    assert mapper.transformed_names_ == ['target_a', 'target_b', 'target_c']
开发者ID:paulgb,项目名称:sklearn-pandas,代码行数:9,代码来源:test_dataframe_mapper.py

示例10: test_transformed_names_simple

def test_transformed_names_simple(simple_dataframe):
    """
    Get transformed names of features in `transformed_names` attribute
    for simple transformation
    """
    df = simple_dataframe
    mapper = DataFrameMapper([('a', None)])
    mapper.fit_transform(df)
    assert mapper.transformed_names_ == ['a']
开发者ID:paulgb,项目名称:sklearn-pandas,代码行数:9,代码来源:test_dataframe_mapper.py

示例11: test_mapper

	def test_mapper(self):
		domain = CategoricalDomain()
		df = DataFrame([{"X" : "2", "y" : 2}, {"X" : "1"}, {"X" : "3"}])
		mapper = DataFrameMapper([
			("X", [domain, LabelBinarizer()]),
			("y", None)
		])
		mapper.fit_transform(df)
		self.assertEqual(numpy.array(["1", "2", "3"]).tolist(), domain.data_.tolist())
开发者ID:jpmml,项目名称:sklearn2pmml,代码行数:9,代码来源:__init__.py

示例12: test_transformed_names_simple_alias

def test_transformed_names_simple_alias(simple_dataframe):
    """
    If we specify an alias for a single output column, it is used for the
    output
    """
    df = simple_dataframe
    mapper = DataFrameMapper([('a', None, {'alias': 'new_name'})])
    mapper.fit_transform(df)
    assert mapper.transformed_names_ == ['new_name']
开发者ID:paulgb,项目名称:sklearn-pandas,代码行数:9,代码来源:test_dataframe_mapper.py

示例13: test_default_none_names

def test_default_none_names():
    """
    If default=None, column names are returned unmodified.
    """
    df = pd.DataFrame({'a': [1, 2, 3], 'b': [3, 5, 7]})
    mapper = DataFrameMapper([], default=None)

    mapper.fit_transform(df)
    assert mapper.transformed_names_ == ['a', 'b']
开发者ID:paulgb,项目名称:sklearn-pandas,代码行数:9,代码来源:test_dataframe_mapper.py

示例14: Transformer

class Transformer(object):
    """
    The purpose of this class is to take a dataframe and transform it into
    a numpy array compatible format.
    """

    def __init__(self, config):
        self.__config = config
        self.__mapper = None
        self.__label_encoder_adapter = TransformerAdapter(LabelEncoderMissingValuesTransformer())

    def prepare(self, dataframe):
        """
        Takes the already cleaned dataframe, splits it into train and test
        and returns the train and test as numpy arrays.
        If the problem is supervised, the target column will be that last one
        of the returned arrays.
        """
        mapping = DataFrameMapCreator().get_mapping_from_config(self.__config)
        self.__mapper = DataFrameMapper(mapping)
        train, test = split_dataframe_train_test(dataframe, self.__config.get_option_parameter("split", "train_percentage"))
        return self.__get_correct_return_parameters(train, test)

    def __get_correct_return_parameters(self, train, test):
        model = self.__config.get_data_model()

        train_transformed = self.__mapper.fit_transform(train)
        test_transformed = self.__mapper.transform(test)

        if model.has_target():
            return self.__add_target_data(train_transformed, train), \
                   self.__add_target_data(test_transformed, test)
        else:
            return train_transformed, test_transformed

    def __add_target_data(self, transformed_data, original_data):
        """
        Picks up the target data from the original_data and appends it as a
        column to the transformed_data.
        Both arguments are expected to be np.array's.
        """
        model = self.__config.get_data_model()
        target_feature = model.find_target_feature()
        name = target_feature.get_name()

        if target_feature.is_categorical():
            target_row = original_data[name]
            target = self.__label_encoder_adapter.transform(target_row)
        else:
            target = original_data[name].values.astype(type_name_to_data_type("float"))

        target = target[..., None]

        return np.hstack((transformed_data, target))

    def apply(self, dataframe):
        return self.__mapper.transform(dataframe)
开发者ID:GMadorell,项目名称:abris,代码行数:57,代码来源:transformer.py

示例15: test_fit_with_optional_y_arg

def test_fit_with_optional_y_arg(complex_dataframe):
    """
    Transformers with an optional y argument in the fit method
    are handled correctly
    """
    df = complex_dataframe
    mapper = DataFrameMapper([(['feat1', 'feat2'], MockTClassifier())])
    # doesn't fail
    mapper.fit(df[['feat1', 'feat2']], df['target'])
开发者ID:shniu,项目名称:sklearn-pandas,代码行数:9,代码来源:test_dataframe_mapper.py


注:本文中的sklearn_pandas.DataFrameMapper类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。