当前位置: 首页>>代码示例>>Python>>正文


Python StandardScaler.fit_transform方法代码示例

本文整理汇总了Python中sklearn.preprocessing.StandardScaler.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python StandardScaler.fit_transform方法的具体用法?Python StandardScaler.fit_transform怎么用?Python StandardScaler.fit_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.preprocessing.StandardScaler的用法示例。


在下文中一共展示了StandardScaler.fit_transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: logisticRegression

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 别名]
def logisticRegression():
    data = loadtxtAndcsv_data("data1.txt", ",", np.float64)
    X = data[:,0:-1]
    y = data[:,-1]

    # 划分为训练集和测试集
    x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

    # 归一化
    scaler = StandardScaler()
    # scaler.fit(x_train)
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.fit_transform(x_test)

    # 逻辑回归
    model = LogisticRegression()
    model.fit(x_train,y_train)

    # 预测
    predict = model.predict(x_test)
    right = sum(predict == y_test)

    predict = np.hstack((predict.reshape(-1,1),y_test.reshape(-1,1)))   # 将预测值和真实值放在一块,好观察
    print(predict)
    print('测试集准确率:%f%%'%(right*100.0/predict.shape[0]))          # 计算在测试集上的准确度
开发者ID:suyuann,项目名称:MachineLearning_Python,代码行数:27,代码来源:LogisticRegression_scikit-learn.py

示例2: test_same_fit_transform

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 别名]
    def test_same_fit_transform(self):
        X, X_rdd = self.make_dense_rdd()

        local = StandardScaler()
        dist = SparkStandardScaler()

        X_trans = local.fit_transform(X)
        X_rdd_trans = dist.fit_transform(X_rdd).toarray()
        X_converted = dist.to_scikit().transform(X)

        assert_array_almost_equal(X_trans, X_rdd_trans)
        assert_array_almost_equal(X_trans, X_converted)

        local = StandardScaler(with_mean=False)
        dist = SparkStandardScaler(with_mean=False)

        X_trans = local.fit_transform(X)
        X_rdd_trans = dist.fit_transform(X_rdd).toarray()
        X_converted = dist.to_scikit().transform(X)

        assert_array_almost_equal(X_trans, X_rdd_trans)
        assert_array_almost_equal(X_trans, X_converted)

        local = StandardScaler(with_std=False)
        dist = SparkStandardScaler(with_std=False)

        X_trans = local.fit_transform(X)
        X_rdd_trans = dist.fit_transform(X_rdd).toarray()
        X_converted = dist.to_scikit().transform(X)

        assert_array_almost_equal(X_trans, X_rdd_trans)
        assert_array_almost_equal(X_trans, X_converted)
开发者ID:lensacom,项目名称:sparkit-learn,代码行数:34,代码来源:test_data.py

示例3: _transform_data

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 别名]
def _transform_data():
    from solaris.run import load_data
    from solaris.models import LocalModel

    data = load_data()
    X = data['X_train']
    y = data['y_train']

    # no shuffle - past-future split
    offset = X.shape[0] * 0.5
    X_train, y_train = X[:offset], y[:offset]
    X_test, y_test = X[offset:], y[offset:]

    print('_' * 80)
    print('transforming data')
    print
    tf = LocalModel(None)
    print('transforming train')
    X_train, y_train = tf.transform(X_train, y_train)
    print('transforming test')
    X_test, y_test = tf.transform(X_test, y_test)
    print('fin')

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    scaler = StandardScaler()
    y_train = scaler.fit_transform(y_train)
    y_test = scaler.transform(y_test)

    data = {'X_train': X_train, 'X_test': X_test,
            'y_train': y_train, 'y_test': y_test}
    joblib.dump(data, 'data/dbndata.pkl')
开发者ID:mhdella,项目名称:kaggle-solar-energy,代码行数:36,代码来源:dbn_eval.py

示例4: generate_dataset

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 别名]
def generate_dataset(n_train, n_test, n_features, noise=0.1, verbose=False):
    """Generate a regression dataset with the given parameters."""
    if verbose:
        print("generating dataset...")

    X, y, coef = make_regression(n_samples=n_train + n_test,
                                 n_features=n_features, noise=noise, coef=True)

    random_seed = 13
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, train_size=n_train, random_state=random_seed)
    X_train, y_train = shuffle(X_train, y_train, random_state=random_seed)

    X_scaler = StandardScaler()
    X_train = X_scaler.fit_transform(X_train)
    X_test = X_scaler.transform(X_test)

    y_scaler = StandardScaler()
    y_train = y_scaler.fit_transform(y_train[:, None])[:, 0]
    y_test = y_scaler.transform(y_test[:, None])[:, 0]

    gc.collect()
    if verbose:
        print("ok")
    return X_train, y_train, X_test, y_test
开发者ID:,项目名称:,代码行数:27,代码来源:

示例5: remove_outliers

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 别名]
def remove_outliers(image,mask):
#taking the mask part to image to check the presence of bee
	im = cv2.bitwise_and(image,image,mask=mask);
	ldp_image,_,_ = ldp.ldp(im);
	test_Y = ldp_image.reshape((ldp_image.shape[0] * ldp_image.shape[1], ldp_image.shape[2]));
	test_rgb = im.reshape((im.shape[0] * im.shape[1], im.shape[2]));
	test = np.concatenate((test_Y,test_rgb),axis=1);
	mask_not = cv2.bitwise_not(mask);
	ret1, mask_not = cv2.threshold (mask_not,np.mean(mask_not), 255, cv2.THRESH_BINARY);		
	im = cv2.bitwise_and(image,image,mask=mask_not);
	ldp_image,_,_ = ldp.ldp(im);	
	data_ldp = ldp_image.reshape((ldp_image.shape[0] * ldp_image.shape[1], ldp_image.shape[2]));
	data_rgb = im.reshape((im.shape[0] * im.shape[1], im.shape[2]));
	data = np.concatenate((data_rgb,data_ldp),axis=1);
	data = data[np.any(data!=0,axis=1)];	
	print data.shape;		
	data = data.astype('float64');		
	data = preprocessing.normalize(data,axis=0);
	ss = StandardScaler();	
	data = ss.fit_transform(data);
	clf = svm.OneClassSVM(nu=0.8, kernel="rbf", gamma=0.1)
	clf.fit(data);
	test = test.astype('float64');		
	test = preprocessing.normalize(test,axis=0);	
	print test.shape;	
	test = ss.fit_transform(test);
	test = clf.predict(test);
	test = test.reshape((image.shape[0] , image.shape[1]));
	test[test==-1] = 0;
	test[test==1] = 255;
	test = test.astype('uint8');
	im = cv2.bitwise_and(image,image,mask=test);	
	im = cv2.bitwise_and(im,im,mask=mask);	
	#print test[:,0],test[:,1];	
	return(im,test);  
开发者ID:sai19,项目名称:Bee_image_classification,代码行数:37,代码来源:bee_preprocess.py

示例6: DBScan_Flux

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 别名]
def DBScan_Flux(phots, ycenters, xcenters, dbsClean=0, useTheForce=False):
    """Class methods are similar to regular functions.

    Note:
        Do not include the `self` parameter in the ``Args`` section.

    Args:
        param1: The first parameter.
        param2: The second parameter.

    Returns:
        True if successful, False otherwise.

    """
    
    dbsPhots    = DBSCAN()#n_jobs=-1)
    stdScaler   = StandardScaler()
    
    phots       = np.copy(phots.ravel())
    phots[~np.isfinite(phots)] = np.median(phots[np.isfinite(phots)])
    
    featuresNow = np.transpose([stdScaler.fit_transform(ycenters[:,None]).ravel(), \
                                stdScaler.fit_transform(xcenters[:,None]).ravel(), \
                                stdScaler.fit_transform(phots[:,None]).ravel()   ] )
    
    # print(featuresNow.shape)
    dbsPhotsPred= dbsPhots.fit_predict(featuresNow)
    
    return dbsPhotsPred == dbsClean
开发者ID:exowanderer,项目名称:ExoplanetTSO,代码行数:31,代码来源:bak_auxiliary.py

示例7: batchScaling

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 别名]
def batchScaling(in_root="raw", out_root="data", with_mean=True, with_std=True):

    Xy_files = filter(lambda x:x.endswith(".Xy.npz"), os.listdir(in_root))
    # Xy_files = ["image_rgb_gist.Xy.npz"]

    for Xy_file in Xy_files:

        in_path = os.path.join( in_root, Xy_file )
        out_path = os.path.join( out_root, Xy_file )

        print '> load %s' % ( in_path )

        data = np.load( in_path )
        
        ## detect sparse or dense
        _sparse = True if len(data['X'].shape) == 0 else False

        print '> scaling'
        if _sparse:
            ## Cannot center sparse matrices: pass `with_mean=False` instead.
            print '>> Sparse matrix detected. Use with_mean=False'
            scaler = StandardScaler(with_mean=False, with_std=with_std)
            X = scaler.fit_transform( data['X'].all() )
        else:
            scaler = StandardScaler(with_mean=with_mean, with_std=with_std)
            X = scaler.fit_transform( data['X'] )

        
        print '> compressing and dumping to %s' % (out_path)
        np.savez_compressed(out_path, X=X, y=data['y'])

        print '='*50
开发者ID:AcademiaSinicaNLPLab,项目名称:LJ40K,代码行数:34,代码来源:batchScaling.py

示例8: TrainValidSplitter

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 别名]
class TrainValidSplitter(object):
    def __init__(self, standardize=True, few=False):
        self.standardize = standardize
        self.few = few
        self.standa = None

    def __call__(self, X, y, net):
        strati = StratifiedShuffleSplit(y = y, n_iter = 1, test_size = 0.2, random_state = 1234)
        
        train_indices, valid_indices = next(iter(strati))
        
        if self.standardize:
            self.standa = StandardScaler()
            if self.few:
                X_train = np.hstack((self.standa.fit_transform(X[train_indices,:23]), X[train_indices,23:]))
                X_valid = np.hstack((self.standa.transform(X[valid_indices,:23]), X[valid_indices,23:]))
            else:
                X_train = self.standa.fit_transform(X[train_indices])
                X_valid = self.standa.transform(X[valid_indices])
        else:
            X_train, X_valid = X[train_indices], X[valid_indices]
        
        y_train, y_valid = y[train_indices], y[valid_indices]
        
        return X_train, X_valid, y_train, y_valid
开发者ID:matchado,项目名称:WalmartTripType,代码行数:27,代码来源:train_and_predict.py

示例9: LinearXGB

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 别名]
class LinearXGB(ClippedMixin):
    trained = set()
    cache = {}

    def __init__(self, params, num_rounds):
        self.params = params
        self.scaler = StandardScaler(with_mean=False)
        self.num_rounds = num_rounds

    def fit(self, dense, svd, sparse, y):
        X_train = np.hstack((dense, svd))
        #X_train = hstack((X_train, sparse))
        train_hash = hash(str(X_train))
        if train_hash not in self.trained:
            X_scaled = self.scaler.fit_transform(X_train)
            X_scaled = normalize(X_scaled)
            dtrain = xgb.DMatrix(X_scaled, label=y)
            watchlist = [(dtrain, 'train')]
            self.bst = xgb.train(self.params, dtrain, self.num_rounds)#, watchlist)
            self.trained.add(train_hash)

    def predict(self, dense, svd, sparse):
        X_test = np.hstack((dense, svd))
        #X_test = hstack((X_test, sparse))
        test_hash = hash(str(X_test))
        if test_hash not in self.cache:
            #X_scaled = X_test
            X_scaled = self.scaler.fit_transform(X_test)
            X_scaled = normalize(X_scaled)
            dtest = xgb.DMatrix(X_scaled)
            #dtest = xgb.DMatrix(X_test)
            y_pred = self.bst.predict(dtest)
            self.cache[test_hash] = y_pred
        return self.cache[test_hash]
开发者ID:ephes,项目名称:kaggle_homedepot,代码行数:36,代码来源:models.py

示例10: prep_X_y

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 别名]
def prep_X_y(df, constant=False, split=True):
	cols_to_exclude = ['venue_state', 'venue_name', 'venue_country', 'venue_address', 'ticket_types', 'email_domain', 'description', 'previous_payouts', 'payee_name', 'org_name', 'org_desc', 'object_id', 'name', 'acct_type', 'country', 'listed', 'currency', 'payout_type', 'channels']

	if constant:
		df['const'] = 1

	X = df.drop(cols_to_exclude + ['fraud'], axis=1).values
	y = df['fraud'].values

	print 'columns used:\n', df.drop(cols_to_exclude + ['fraud'], axis=1).columns

	if split:
		X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)

		scaler = StandardScaler()
		X_train = scaler.fit_transform(X_train)
		X_test = scaler.fit_transform(X_test)

		X_smoted, y_smoted = smote(X_train, y_train, target=.5)
		return X_smoted, X_test, y_smoted, y_test
	else:
		scaler = StandardScaler()
		X = scaler.fit_transform(X)
		X_smoted, y_smoted = smote(X, y, target=.5)
		return X_smoted, y_smoted
开发者ID:a-knit,项目名称:fraud_detector,代码行数:27,代码来源:pair_2.py

示例11: data_fr

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 别名]
def data_fr(novel_num):
    #if csv_file(novel, novel_num) is True:
    nn = str(novel_num)
    df_novel = pd.read_csv('novel_'+nn+'list_1.csv', header=None)
    try: 
        df_novel['wrd_length'] = df_novel[0].apply(wrd_lengths)
        df_novel['total_char'] = [sum(l) for l in df_novel['wrd_length']]
        df_novel['syl_count'] = df_novel[0].apply(syl_count)
        df_novel['syl_sum'] = [sum(l) for l in df_novel['syl_count']]
        df_novel['sentiment'] = df_novel[0].apply(detect_sentiment)
        #create csv for word to syl to improve syl function
        d = {}
        for l in df_novel[0]:
            sent = TextBlob(l)
            for x in sent.words:
                w = CountSyllables(x)
                d[x] = w
        with open('novel_'+nn+'list_1_syl.csv', 'wb') as f:
            writer = csv.writer(f)
            for row in d.iteritems():
                writer.writerow(row) 
        #create cluster columns
        df_cluster = df_novel.drop('wrd_length', 1)
        df_cluster = df_cluster.drop('syl_count', 1)
        X = df_cluster.drop(0, axis = 1)
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)
        km = KMeans(n_clusters=20, random_state=1)
        km.fit(X_scaled)
        df_cluster_20 = df_cluster.copy()
        df_cluster_20['cluster'] = km.labels_
        df_novel['cluster_20'] = df_cluster_20['cluster']
        #Create cluster 3
        df_cluster_3 = df_cluster.copy()
        X = df_cluster_3.drop(0, axis=1)
        X_scaled = scaler.fit_transform(X)
        km = KMeans(n_clusters = 3, random_state=1)
        km.fit(X_scaled)
        df_cluster_3['cluster'] = km.labels_
        df_novel['cluster_3_syl'] = df_cluster_3['cluster']
        #create cluster 3 no syl
        df_cluster_3no_syl = df_cluster.copy()
        X = df_cluster_3no_syl.drop(0, axis=1)
        X_scaled = scaler.fit_transform(X)
        km = KMeans(n_clusters=3, random_state=1)
        km.fit(X_scaled)
        df_cluster_3no_syl['cluster'] = km.labels_
        df_novel['cluster_3no_syl'] = df_cluster_3no_syl['cluster']
        #Create 5 clusters
        df_cluster_5 = df_cluster.copy()
        X = df_cluster_5.drop(0, axis=1)
        X_scaled = scaler.fit_transform(X)
        km = KMeans(n_clusters=5, random_state=1)
        km.fit(X_scaled)
        df_cluster_5['cluster'] = km.labels_
        df_novel['cluster_5'] = df_cluster_5['cluster']
        df_novel.to_csv('novel_'+nn+'list_1.csv', index=False)
    except:
        rejects_3.append(novel_num)
开发者ID:ravenruckus,项目名称:novel_processing,代码行数:61,代码来源:novel_processing.py

示例12: train_test

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 别名]
    def train_test(self, X, y, X_test):
        """
        """
        sss = StratifiedShuffleSplit(y, 1, test_size=0.5)    
        for train_id, valid_id in sss:
            X0, X1 = X[train_id], X[valid_id]
            y0, y1 = y[train_id], y[valid_id]  
            
        #First half
        
        w0 = np.zeros(len(y0))
        for i in range(len(w0)):
            w0[i] = self.w[int(y0[i])]
        xg0_train = DMatrix(X0, label=y0, weight=w0)  
        xg0_test = DMatrix(X1, label=y1)   
        xgt_test = DMatrix(X_test)
        bst0 = my_train_xgboost(self.param, xg0_train, self.num_round)
        y0_pred = bst0.predict(xg0_test).reshape(X1.shape[0], 9)
        yt_pred = bst0.predict(xgt_test).reshape(X_test.shape[0], 9)
        
        #Calibrated RF
        rf = RandomForestClassifier(n_estimators=600, criterion='gini', 
                class_weight='auto', max_features='auto')
        cal = CalibratedClassifierCV(rf, method='isotonic', cv=3)
        cal.fit(X0, y0)
        y0_cal = cal.predict_proba(X1)
        yt_cal = cal.predict_proba(X_test)
        
        #Second half
        ss = StandardScaler()
        y0_pred = ss.fit_transform(y0_pred)
        yt_pred = ss.fit_transform(yt_pred)
        y0_cal = ss.fit_transform(y0_cal)
        yt_cal = ss.fit_transform(yt_cal)
        X1 = np.hstack((X1, y0_pred, y0_cal))
        X_test = np.hstack((X_test, yt_pred, yt_cal))  
        w1 = np.zeros(len(y1))
        
#        self.param['eta'] = 0.01
        self.num_round = 450

        for i in range(len(w1)):
            w1[i] = self.w[int(y1[i])]
        xg1_train = DMatrix(X1, label=y1, weight=w1)    
        xg_test= DMatrix(X_test)
        bst1 = my_train_xgboost(self.param, xg1_train, self.num_round)
        y_pred = bst1.predict(xg_test).reshape(X_test.shape[0], 9)
        
        return y_pred






                    
        
开发者ID:chrinide,项目名称:kaggle_otto_group,代码行数:51,代码来源:clf_xgboost_split.py

示例13: correlation_matching

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 别名]
def correlation_matching(I_tr, T_tr, I_te, T_te, n_comps):
    """ Learns correlation matching (CM) over I_tr and T_tr
        and applies it to I_tr, T_tr, I_te, T_te
        
        
        Parameters
        ----------
        
        I_tr: np.ndarray [shape=(n_tr, d_I)]
            image data matrix for training
        
        T_tr: np.ndarray [shape=(n_tr, d_T)]
            text data matrix for training
        
        I_te: np.ndarray [shape=(n_te, d_I)]
            image data matrix for testing
        
        T_te: np.ndarray [shape=(n_te, d_T)]
            text data matrix for testing
        
        n_comps: int > 0 [scalar]
            number of canonical componens to use
            
        Returns
        -------
        
        I_tr_cca : np.ndarray [shape=(n_tr, n_comps)]
            image data matrix represetned in correlation space
        
        T_tr_cca : np.ndarray [shape=(n_tr, n_comps)]
            text data matrix represetned in correlation space
        
        I_te_cca : np.ndarray [shape=(n_te, n_comps)]
            image data matrix represetned in correlation space
        
        T_te_cca : np.ndarray [shape=(n_te, n_comps)]
            text data matrix represetned in correlation space
        
        """


    # sclale image and text data
    I_scaler = StandardScaler()
    I_tr = I_scaler.fit_transform(I_tr)
    I_te = I_scaler.transform(I_te)

    T_scaler = StandardScaler()
    T_tr = T_scaler.fit_transform(T_tr)
    T_te = T_scaler.transform(T_te)

    cca = PLSCanonical(n_components=n_comps, scale=False)
    cca.fit(I_tr, T_tr)

    I_tr_cca, T_tr_cca = cca.transform(I_tr, T_tr)
    I_te_cca, T_te_cca = cca.transform(I_te, T_te)

    return I_tr_cca, T_tr_cca, I_te_cca, T_te_cca
开发者ID:emanuetre,项目名称:crossmodal,代码行数:59,代码来源:correlation_matching.py

示例14: stack_features

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 别名]
def stack_features(params):

    """
    Get local features for all training images together
    """

    # Init detector and extractor
    detector, extractor = init_detect_extract(params)

    # Read image names
    with open(
        os.path.join(params["root"], params["root_save"], params["image_lists"], params["split"] + ".txt"), "r"
    ) as f:
        image_list = f.readlines()

    X = []
    for image_name in image_list:

        # Read image
        im = cv2.imread(
            os.path.join(params["root"], params["database"], params["split"], "images", image_name.rstrip())
        )

        # Resize image
        im = resize_image(params, im)

        feats = image_local_features(im, detector, extractor)
        # Stack all local descriptors together

        if feats is not None:
            if len(X) == 0:

                X = feats
            else:
                X = np.vstack((X, feats))

    if params["normalize_feats"]:
        X = normalize(X)

    if params["whiten"]:

        pca = PCA(whiten=True)
        pca.fit_transform(X)

    else:
        pca = None

    # Scale data to 0 mean and unit variance
    if params["scale"]:

        scaler = StandardScaler()

        scaler.fit_transform(X)
    else:
        scaler = None

    return X, pca, scaler
开发者ID:gdsa-upc,项目名称:gdsa-suport,代码行数:59,代码来源:get_features.py

示例15: main

# 需要导入模块: from sklearn.preprocessing import StandardScaler [as 别名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 别名]
def main():

    df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data',
            header = None,
            sep = '\s+')
    df.columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM',
            'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B',
            'LSTAT', 'MEDV']
    print(df.head())

    # Select a subset of the features and plot the correlation between features
    cols = ['LSTAT', 'INDUS', 'NOX', 'RM', 'MEDV']
    sns.pairplot(df[cols], size=2.5);
    plt.title('Correlations between 5 features')
    plt.show()

    # Plot a heatmap of the same subset of features
    cm = np.corrcoef(df[cols].values.T)
    sns.set(font_scale=2.5)
    hm = sns.heatmap(cm,
            cbar = True,
            annot = True,
            square = True,
            fmt = '.2f',
            annot_kws = {'size': 15},
            yticklabels = cols,
            xticklabels = cols)
    plt.show()

    X = df[['RM']].values
    y = df['MEDV'].values

    sc_x = StandardScaler()
    sc_y = StandardScaler()

    X_std = sc_x.fit_transform(X)
    y_std = sc_y.fit_transform(y)
    
    lr = LinearRegressionGD()
    lr.fit(X_std, y_std)

    plt.plot(range(1, lr.n_iter + 1), lr.cost_)
    plt.ylabel('SSE')
    plt.xlabel('Epoch')
    plt.show()

    lin_regplot(X_std, y_std, lr)
    plt.xlabel('Average number of rooms [RM] (standardized)')
    plt.ylabel('Price in $1000\'s [MEDV] (standardized)')
    plt.show()
    
    # Example classification for a house with 5 rooms
    num_rooms_std = sc_x.transform([5.0])
    price_std = lr.predict(num_rooms_std)
    print("Price in $1000's: %.3f" % \
            sc_y.inverse_transform(price_std))
开发者ID:southpaw94,项目名称:MachineLearning,代码行数:58,代码来源:housing.py


注:本文中的sklearn.preprocessing.StandardScaler.fit_transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。