当前位置: 首页>>代码示例>>Python>>正文


Python StandardScaler.fit_transform方法代码示例

本文整理汇总了Python中sklearn.preprocessing.data.StandardScaler.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python StandardScaler.fit_transform方法的具体用法?Python StandardScaler.fit_transform怎么用?Python StandardScaler.fit_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.preprocessing.data.StandardScaler的用法示例。


在下文中一共展示了StandardScaler.fit_transform方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: read_file

# 需要导入模块: from sklearn.preprocessing.data import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.data.StandardScaler import fit_transform [as 别名]
def read_file():
    file_content = pd.read_csv('train.csv')
    exc_cols = [u'Id', u'Response']
    cols = [c for c in file_content.columns if c not in exc_cols]
    train_datas = file_content.ix[:, cols]
    train_lables = file_content['Response'].values
    
    test_file = pd.read_csv('test.csv')
    test_ids = test_file['Id'].values
    test_datas = test_file.ix[:, [c for c in test_file.columns if c not in [u'Id']]]
    
    # 填充平均值
    test_datas = test_datas.fillna(-1)
    train_datas = train_datas.fillna(-1)
    all_datas = pd.concat([train_datas, test_datas], axis=0) 
    
    # 对数据进行一下划分
    categoricalVariables = ["Product_Info_1", "Product_Info_2", "Product_Info_3", "Product_Info_5", "Product_Info_6", "Product_Info_7", "Employment_Info_2", "Employment_Info_3", "Employment_Info_5", "InsuredInfo_1", "InsuredInfo_2", "InsuredInfo_3", "InsuredInfo_4", "InsuredInfo_5", "InsuredInfo_6", "InsuredInfo_7", "Insurance_History_1", "Insurance_History_2", "Insurance_History_3", "Insurance_History_4", "Insurance_History_7", "Insurance_History_8", "Insurance_History_9", "Family_Hist_1", "Medical_History_2", "Medical_History_3", "Medical_History_4", "Medical_History_5", "Medical_History_6", "Medical_History_7", "Medical_History_8", "Medical_History_9", "Medical_History_10", "Medical_History_11", "Medical_History_12", "Medical_History_13", "Medical_History_14", "Medical_History_16", "Medical_History_17", "Medical_History_18", "Medical_History_19", "Medical_History_20", "Medical_History_21", "Medical_History_22", "Medical_History_23", "Medical_History_25", "Medical_History_26", "Medical_History_27", "Medical_History_28", "Medical_History_29", "Medical_History_30", "Medical_History_31", "Medical_History_33", "Medical_History_34", "Medical_History_35", "Medical_History_36", "Medical_History_37", "Medical_History_38", "Medical_History_39", "Medical_History_40", "Medical_History_41"]
    all_file_data = all_datas.ix[:, [c for c in all_datas.columns if c not in categoricalVariables]]
    all_file_cate = all_datas.ix[:, [c for c in categoricalVariables]]
 
    # 归一化 对数值数据
    scalar_this = StandardScaler()
    scalar_this.fit_transform(all_file_data)
    
    # 重新组合数据
    train_datas = pd.concat([all_file_data[:train_datas.shape[0]], all_file_cate[:train_datas.shape[0]]], axis=1)
    test_datas = pd.concat([all_file_data[file_content.shape[0]:], all_file_cate[file_content.shape[0]:]], axis=1)
    
    # 向量化
    train_datas = DictVectorizer().fit_transform(train_datas.to_dict(outtype='records')).toarray()
    test_datas = DictVectorizer().fit_transform(test_datas.to_dict(outtype='records')).toarray()
    
    return (train_datas, train_lables, test_ids, test_datas)
开发者ID:xuerenlv,项目名称:PaperWork,代码行数:36,代码来源:kaggle_homework.py

示例2: test_scaler_without_centering

# 需要导入模块: from sklearn.preprocessing.data import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.data.StandardScaler import fit_transform [as 别名]
def test_scaler_without_centering():
    rng = np.random.RandomState(42)
    X = rng.randn(4, 5)
    X[:, 0] = 0.0  # first feature is always of zero
    X_csr = sparse.csr_matrix(X)
    X_csc = sparse.csc_matrix(X)

    assert_raises(ValueError, StandardScaler().fit, X_csr)

    null_transform = StandardScaler(with_mean=False, with_std=False, copy=True)
    X_null = null_transform.fit_transform(X_csr)
    assert_array_equal(X_null.data, X_csr.data)
    X_orig = null_transform.inverse_transform(X_null)
    assert_array_equal(X_orig.data, X_csr.data)

    scaler = StandardScaler(with_mean=False).fit(X)
    X_scaled = scaler.transform(X, copy=True)
    assert_false(np.any(np.isnan(X_scaled)))

    scaler_csr = StandardScaler(with_mean=False).fit(X_csr)
    X_csr_scaled = scaler_csr.transform(X_csr, copy=True)
    assert_false(np.any(np.isnan(X_csr_scaled.data)))

    scaler_csc = StandardScaler(with_mean=False).fit(X_csc)
    X_csc_scaled = scaler_csr.transform(X_csc, copy=True)
    assert_false(np.any(np.isnan(X_csc_scaled.data)))

    assert_equal(scaler.mean_, scaler_csr.mean_)
    assert_array_almost_equal(scaler.std_, scaler_csr.std_)

    assert_equal(scaler.mean_, scaler_csc.mean_)
    assert_array_almost_equal(scaler.std_, scaler_csc.std_)

    assert_array_almost_equal(
        X_scaled.mean(axis=0), [0., -0.01, 2.24, -0.35, -0.78], 2)
    assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])

    X_csr_scaled_mean, X_csr_scaled_std = mean_variance_axis0(X_csr_scaled)
    assert_array_almost_equal(X_csr_scaled_mean, X_scaled.mean(axis=0))
    assert_array_almost_equal(X_csr_scaled_std, X_scaled.std(axis=0))

    # Check that X has not been modified (copy)
    assert_true(X_scaled is not X)
    assert_true(X_csr_scaled is not X_csr)

    X_scaled_back = scaler.inverse_transform(X_scaled)
    assert_true(X_scaled_back is not X)
    assert_true(X_scaled_back is not X_scaled)
    assert_array_almost_equal(X_scaled_back, X)

    X_csr_scaled_back = scaler_csr.inverse_transform(X_csr_scaled)
    assert_true(X_csr_scaled_back is not X_csr)
    assert_true(X_csr_scaled_back is not X_csr_scaled)
    assert_array_almost_equal(X_csr_scaled_back.toarray(), X)

    X_csc_scaled_back = scaler_csr.inverse_transform(X_csc_scaled.tocsc())
    assert_true(X_csc_scaled_back is not X_csc)
    assert_true(X_csc_scaled_back is not X_csc_scaled)
    assert_array_almost_equal(X_csc_scaled_back.toarray(), X)
开发者ID:CodeGenerator,项目名称:scikit-learn,代码行数:61,代码来源:test_data.py

示例3: test_scalar

# 需要导入模块: from sklearn.preprocessing.data import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.data.StandardScaler import fit_transform [as 别名]
def test_scalar():
    from sklearn.preprocessing.data import MinMaxScaler, StandardScaler
    scalar = StandardScaler()
    
    training = pd.read_csv(TRAIN_FEATURES_CSV, nrows=200000)
    test = pd.read_csv(TEST_FEATURES_CSV)
    
    # normalize the values
    for column in TOTAL_TRAINING_FEATURE_COLUMNS:
        training[column] = scalar.fit_transform(training[column])
        test[column] = scalar.transform(test[column])
开发者ID:testing32,项目名称:bimbo,代码行数:13,代码来源:analyze_data.py

示例4: test_scaler_int

# 需要导入模块: from sklearn.preprocessing.data import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.data.StandardScaler import fit_transform [as 别名]
def test_scaler_int():
    # test that scaler converts integer input to floating
    # for both sparse and dense matrices
    rng = np.random.RandomState(42)
    X = rng.randint(20, size=(4, 5))
    X[:, 0] = 0  # first feature is always of zero
    X_csr = sparse.csr_matrix(X)
    X_csc = sparse.csc_matrix(X)

    null_transform = StandardScaler(with_mean=False, with_std=False, copy=True)
    with warnings.catch_warnings(record=True):
        X_null = null_transform.fit_transform(X_csr)
    assert_array_equal(X_null.data, X_csr.data)
    X_orig = null_transform.inverse_transform(X_null)
    assert_array_equal(X_orig.data, X_csr.data)

    with warnings.catch_warnings(record=True):
        scaler = StandardScaler(with_mean=False).fit(X)
        X_scaled = scaler.transform(X, copy=True)
    assert_false(np.any(np.isnan(X_scaled)))

    with warnings.catch_warnings(record=True):
        scaler_csr = StandardScaler(with_mean=False).fit(X_csr)
        X_csr_scaled = scaler_csr.transform(X_csr, copy=True)
    assert_false(np.any(np.isnan(X_csr_scaled.data)))

    with warnings.catch_warnings(record=True):
        scaler_csc = StandardScaler(with_mean=False).fit(X_csc)
        X_csc_scaled = scaler_csr.transform(X_csc, copy=True)
    assert_false(np.any(np.isnan(X_csc_scaled.data)))

    assert_equal(scaler.mean_, scaler_csr.mean_)
    assert_array_almost_equal(scaler.std_, scaler_csr.std_)

    assert_equal(scaler.mean_, scaler_csc.mean_)
    assert_array_almost_equal(scaler.std_, scaler_csc.std_)

    assert_array_almost_equal(
        X_scaled.mean(axis=0),
        [0., 1.109, 1.856, 21., 1.559], 2)
    assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])

    X_csr_scaled_mean, X_csr_scaled_std = mean_variance_axis0(
        X_csr_scaled.astype(np.float))
    assert_array_almost_equal(X_csr_scaled_mean, X_scaled.mean(axis=0))
    assert_array_almost_equal(X_csr_scaled_std, X_scaled.std(axis=0))

    # Check that X has not been modified (copy)
    assert_true(X_scaled is not X)
    assert_true(X_csr_scaled is not X_csr)

    X_scaled_back = scaler.inverse_transform(X_scaled)
    assert_true(X_scaled_back is not X)
    assert_true(X_scaled_back is not X_scaled)
    assert_array_almost_equal(X_scaled_back, X)

    X_csr_scaled_back = scaler_csr.inverse_transform(X_csr_scaled)
    assert_true(X_csr_scaled_back is not X_csr)
    assert_true(X_csr_scaled_back is not X_csr_scaled)
    assert_array_almost_equal(X_csr_scaled_back.toarray(), X)

    X_csc_scaled_back = scaler_csr.inverse_transform(X_csc_scaled.tocsc())
    assert_true(X_csc_scaled_back is not X_csc)
    assert_true(X_csc_scaled_back is not X_csc_scaled)
    assert_array_almost_equal(X_csc_scaled_back.toarray(), X)
开发者ID:CodeGenerator,项目名称:scikit-learn,代码行数:67,代码来源:test_data.py

示例5: SkRanker

# 需要导入模块: from sklearn.preprocessing.data import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.data.StandardScaler import fit_transform [as 别名]
class SkRanker(Ranker, SkLearner):
    '''
    Basic ranker wrapping scikit-learn functions
    '''
    
    def train(self, dataset_filename, 
              scale=True, 
              feature_selector=None, 
              feature_selection_params={},
              feature_selection_threshold=.25, 
              learning_params={}, 
              optimize=True, 
              optimization_params={}, 
              scorers=['f1_score'],
              attribute_set=None,
              class_name=None,
              metaresults_prefix="./0-",
              **kwargs):
        
        plot_filename = "{}{}".format(metaresults_prefix, "featureselection.pdf")
        data, labels = dataset_to_instances(dataset_filename, attribute_set, class_name,  **kwargs)
        learner = self.learner
        
        #the class must remember the attribute_set and the class_name in order to reproduce the vectors
        self.attribute_set = attribute_set
        self.class_name = class_name

 
        #scale data to the mean
        if scale:
            log.info("Scaling datasets...")
            log.debug("Data shape before scaling: {}".format(data.shape))
            self.scaler = StandardScaler()
            data = self.scaler.fit_transform(data)
            log.debug("Data shape after scaling: {}".format(data.shape))
            log.debug("Mean: {} , Std: {}".format(self.scaler.mean_, self.scaler.std_))

        #avoid any NaNs and Infs that may have occurred due to the scaling
        data = np.nan_to_num(data)
        
        #feature selection
        if isinstance(feature_selection_params, basestring):
            feature_selection_params = eval(feature_selection_params)
        self.featureselector, data, metadata = self.run_feature_selection(data, labels, feature_selector, feature_selection_params, feature_selection_threshold, plot_filename) 
        
        #initialize learning method and scoring functions and optimize
        self.learner, self.scorers = self.initialize_learning_method(learner, data, labels, learning_params, optimize, optimization_params, scorers)

        log.info("Data shape before fitting: {}".format(data.shape))

        self.learner.fit(data, labels)
        self.fit = True
        return metadata
    
    def get_model_description(self):
        params = {}
        
        if self.scaler:
            params = self.scaler.get_params(deep=True)
        try: #these are for SVC
            if self.learner.kernel == "rbf":
                params["gamma"] = self.learner.gamma
                params["C"] = self.learner.C
                for i, n_support in enumerate(self.learner.n_support_):
                    params["n_{}".format(i)] = n_support
                log.debug(len(self.learner.dual_coef_))
                return params
            elif self.learner.kernel == "linear":
                coefficients = self.learner.coef_
                att_coefficients = {}
                for attname, coeff in zip(self.attribute_set.get_names_pairwise(), coefficients[0]):
                    att_coefficients[attname] = coeff
                return att_coefficients
        except AttributeError:
            pass
        try: #adaboost etc
            params = self.learner.get_params()
            numeric_params = OrderedDict()
            for key, value in params.iteritems():
                try:
                    value = float(value)
                except ValueError:
                    continue
                numeric_params[key] = value
            return numeric_params
        except:
            pass
        return {}
    
    
    def get_ranked_sentence(self, parallelsentence, critical_attribute="rank_predicted", 
                            new_rank_name="rank_hard", 
                            del_orig_class_att=False, 
                            bidirectional_pairs=False, 
                            ties=True,
                            reconstruct='hard'):
        """
        """
        if type(self.learner) == str:
            if self.classifier:
#.........这里部分代码省略.........
开发者ID:lefterav,项目名称:qualitative,代码行数:103,代码来源:ranking.py


注:本文中的sklearn.preprocessing.data.StandardScaler.fit_transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。