當前位置: 首頁>>代碼示例>>Python>>正文


Python StandardScaler.fit_transform方法代碼示例

本文整理匯總了Python中sklearn.preprocessing.StandardScaler.fit_transform方法的典型用法代碼示例。如果您正苦於以下問題:Python StandardScaler.fit_transform方法的具體用法?Python StandardScaler.fit_transform怎麽用?Python StandardScaler.fit_transform使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在sklearn.preprocessing.StandardScaler的用法示例。


在下文中一共展示了StandardScaler.fit_transform方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: logisticRegression

# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def logisticRegression():
    data = loadtxtAndcsv_data("data1.txt", ",", np.float64)
    X = data[:,0:-1]
    y = data[:,-1]

    # 劃分為訓練集和測試集
    x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

    # 歸一化
    scaler = StandardScaler()
    # scaler.fit(x_train)
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.fit_transform(x_test)

    # 邏輯回歸
    model = LogisticRegression()
    model.fit(x_train,y_train)

    # 預測
    predict = model.predict(x_test)
    right = sum(predict == y_test)

    predict = np.hstack((predict.reshape(-1,1),y_test.reshape(-1,1)))   # 將預測值和真實值放在一塊,好觀察
    print(predict)
    print('測試集準確率:%f%%'%(right*100.0/predict.shape[0]))          # 計算在測試集上的準確度
開發者ID:suyuann,項目名稱:MachineLearning_Python,代碼行數:27,代碼來源:LogisticRegression_scikit-learn.py

示例2: test_same_fit_transform

# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
    def test_same_fit_transform(self):
        X, X_rdd = self.make_dense_rdd()

        local = StandardScaler()
        dist = SparkStandardScaler()

        X_trans = local.fit_transform(X)
        X_rdd_trans = dist.fit_transform(X_rdd).toarray()
        X_converted = dist.to_scikit().transform(X)

        assert_array_almost_equal(X_trans, X_rdd_trans)
        assert_array_almost_equal(X_trans, X_converted)

        local = StandardScaler(with_mean=False)
        dist = SparkStandardScaler(with_mean=False)

        X_trans = local.fit_transform(X)
        X_rdd_trans = dist.fit_transform(X_rdd).toarray()
        X_converted = dist.to_scikit().transform(X)

        assert_array_almost_equal(X_trans, X_rdd_trans)
        assert_array_almost_equal(X_trans, X_converted)

        local = StandardScaler(with_std=False)
        dist = SparkStandardScaler(with_std=False)

        X_trans = local.fit_transform(X)
        X_rdd_trans = dist.fit_transform(X_rdd).toarray()
        X_converted = dist.to_scikit().transform(X)

        assert_array_almost_equal(X_trans, X_rdd_trans)
        assert_array_almost_equal(X_trans, X_converted)
開發者ID:lensacom,項目名稱:sparkit-learn,代碼行數:34,代碼來源:test_data.py

示例3: _transform_data

# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def _transform_data():
    from solaris.run import load_data
    from solaris.models import LocalModel

    data = load_data()
    X = data['X_train']
    y = data['y_train']

    # no shuffle - past-future split
    offset = X.shape[0] * 0.5
    X_train, y_train = X[:offset], y[:offset]
    X_test, y_test = X[offset:], y[offset:]

    print('_' * 80)
    print('transforming data')
    print
    tf = LocalModel(None)
    print('transforming train')
    X_train, y_train = tf.transform(X_train, y_train)
    print('transforming test')
    X_test, y_test = tf.transform(X_test, y_test)
    print('fin')

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    scaler = StandardScaler()
    y_train = scaler.fit_transform(y_train)
    y_test = scaler.transform(y_test)

    data = {'X_train': X_train, 'X_test': X_test,
            'y_train': y_train, 'y_test': y_test}
    joblib.dump(data, 'data/dbndata.pkl')
開發者ID:mhdella,項目名稱:kaggle-solar-energy,代碼行數:36,代碼來源:dbn_eval.py

示例4: generate_dataset

# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def generate_dataset(n_train, n_test, n_features, noise=0.1, verbose=False):
    """Generate a regression dataset with the given parameters."""
    if verbose:
        print("generating dataset...")

    X, y, coef = make_regression(n_samples=n_train + n_test,
                                 n_features=n_features, noise=noise, coef=True)

    random_seed = 13
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, train_size=n_train, random_state=random_seed)
    X_train, y_train = shuffle(X_train, y_train, random_state=random_seed)

    X_scaler = StandardScaler()
    X_train = X_scaler.fit_transform(X_train)
    X_test = X_scaler.transform(X_test)

    y_scaler = StandardScaler()
    y_train = y_scaler.fit_transform(y_train[:, None])[:, 0]
    y_test = y_scaler.transform(y_test[:, None])[:, 0]

    gc.collect()
    if verbose:
        print("ok")
    return X_train, y_train, X_test, y_test
開發者ID:,項目名稱:,代碼行數:27,代碼來源:

示例5: remove_outliers

# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def remove_outliers(image,mask):
#taking the mask part to image to check the presence of bee
	im = cv2.bitwise_and(image,image,mask=mask);
	ldp_image,_,_ = ldp.ldp(im);
	test_Y = ldp_image.reshape((ldp_image.shape[0] * ldp_image.shape[1], ldp_image.shape[2]));
	test_rgb = im.reshape((im.shape[0] * im.shape[1], im.shape[2]));
	test = np.concatenate((test_Y,test_rgb),axis=1);
	mask_not = cv2.bitwise_not(mask);
	ret1, mask_not = cv2.threshold (mask_not,np.mean(mask_not), 255, cv2.THRESH_BINARY);		
	im = cv2.bitwise_and(image,image,mask=mask_not);
	ldp_image,_,_ = ldp.ldp(im);	
	data_ldp = ldp_image.reshape((ldp_image.shape[0] * ldp_image.shape[1], ldp_image.shape[2]));
	data_rgb = im.reshape((im.shape[0] * im.shape[1], im.shape[2]));
	data = np.concatenate((data_rgb,data_ldp),axis=1);
	data = data[np.any(data!=0,axis=1)];	
	print data.shape;		
	data = data.astype('float64');		
	data = preprocessing.normalize(data,axis=0);
	ss = StandardScaler();	
	data = ss.fit_transform(data);
	clf = svm.OneClassSVM(nu=0.8, kernel="rbf", gamma=0.1)
	clf.fit(data);
	test = test.astype('float64');		
	test = preprocessing.normalize(test,axis=0);	
	print test.shape;	
	test = ss.fit_transform(test);
	test = clf.predict(test);
	test = test.reshape((image.shape[0] , image.shape[1]));
	test[test==-1] = 0;
	test[test==1] = 255;
	test = test.astype('uint8');
	im = cv2.bitwise_and(image,image,mask=test);	
	im = cv2.bitwise_and(im,im,mask=mask);	
	#print test[:,0],test[:,1];	
	return(im,test);  
開發者ID:sai19,項目名稱:Bee_image_classification,代碼行數:37,代碼來源:bee_preprocess.py

示例6: DBScan_Flux

# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def DBScan_Flux(phots, ycenters, xcenters, dbsClean=0, useTheForce=False):
    """Class methods are similar to regular functions.

    Note:
        Do not include the `self` parameter in the ``Args`` section.

    Args:
        param1: The first parameter.
        param2: The second parameter.

    Returns:
        True if successful, False otherwise.

    """
    
    dbsPhots    = DBSCAN()#n_jobs=-1)
    stdScaler   = StandardScaler()
    
    phots       = np.copy(phots.ravel())
    phots[~np.isfinite(phots)] = np.median(phots[np.isfinite(phots)])
    
    featuresNow = np.transpose([stdScaler.fit_transform(ycenters[:,None]).ravel(), \
                                stdScaler.fit_transform(xcenters[:,None]).ravel(), \
                                stdScaler.fit_transform(phots[:,None]).ravel()   ] )
    
    # print(featuresNow.shape)
    dbsPhotsPred= dbsPhots.fit_predict(featuresNow)
    
    return dbsPhotsPred == dbsClean
開發者ID:exowanderer,項目名稱:ExoplanetTSO,代碼行數:31,代碼來源:bak_auxiliary.py

示例7: batchScaling

# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def batchScaling(in_root="raw", out_root="data", with_mean=True, with_std=True):

    Xy_files = filter(lambda x:x.endswith(".Xy.npz"), os.listdir(in_root))
    # Xy_files = ["image_rgb_gist.Xy.npz"]

    for Xy_file in Xy_files:

        in_path = os.path.join( in_root, Xy_file )
        out_path = os.path.join( out_root, Xy_file )

        print '> load %s' % ( in_path )

        data = np.load( in_path )
        
        ## detect sparse or dense
        _sparse = True if len(data['X'].shape) == 0 else False

        print '> scaling'
        if _sparse:
            ## Cannot center sparse matrices: pass `with_mean=False` instead.
            print '>> Sparse matrix detected. Use with_mean=False'
            scaler = StandardScaler(with_mean=False, with_std=with_std)
            X = scaler.fit_transform( data['X'].all() )
        else:
            scaler = StandardScaler(with_mean=with_mean, with_std=with_std)
            X = scaler.fit_transform( data['X'] )

        
        print '> compressing and dumping to %s' % (out_path)
        np.savez_compressed(out_path, X=X, y=data['y'])

        print '='*50
開發者ID:AcademiaSinicaNLPLab,項目名稱:LJ40K,代碼行數:34,代碼來源:batchScaling.py

示例8: TrainValidSplitter

# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
class TrainValidSplitter(object):
    def __init__(self, standardize=True, few=False):
        self.standardize = standardize
        self.few = few
        self.standa = None

    def __call__(self, X, y, net):
        strati = StratifiedShuffleSplit(y = y, n_iter = 1, test_size = 0.2, random_state = 1234)
        
        train_indices, valid_indices = next(iter(strati))
        
        if self.standardize:
            self.standa = StandardScaler()
            if self.few:
                X_train = np.hstack((self.standa.fit_transform(X[train_indices,:23]), X[train_indices,23:]))
                X_valid = np.hstack((self.standa.transform(X[valid_indices,:23]), X[valid_indices,23:]))
            else:
                X_train = self.standa.fit_transform(X[train_indices])
                X_valid = self.standa.transform(X[valid_indices])
        else:
            X_train, X_valid = X[train_indices], X[valid_indices]
        
        y_train, y_valid = y[train_indices], y[valid_indices]
        
        return X_train, X_valid, y_train, y_valid
開發者ID:matchado,項目名稱:WalmartTripType,代碼行數:27,代碼來源:train_and_predict.py

示例9: LinearXGB

# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
class LinearXGB(ClippedMixin):
    trained = set()
    cache = {}

    def __init__(self, params, num_rounds):
        self.params = params
        self.scaler = StandardScaler(with_mean=False)
        self.num_rounds = num_rounds

    def fit(self, dense, svd, sparse, y):
        X_train = np.hstack((dense, svd))
        #X_train = hstack((X_train, sparse))
        train_hash = hash(str(X_train))
        if train_hash not in self.trained:
            X_scaled = self.scaler.fit_transform(X_train)
            X_scaled = normalize(X_scaled)
            dtrain = xgb.DMatrix(X_scaled, label=y)
            watchlist = [(dtrain, 'train')]
            self.bst = xgb.train(self.params, dtrain, self.num_rounds)#, watchlist)
            self.trained.add(train_hash)

    def predict(self, dense, svd, sparse):
        X_test = np.hstack((dense, svd))
        #X_test = hstack((X_test, sparse))
        test_hash = hash(str(X_test))
        if test_hash not in self.cache:
            #X_scaled = X_test
            X_scaled = self.scaler.fit_transform(X_test)
            X_scaled = normalize(X_scaled)
            dtest = xgb.DMatrix(X_scaled)
            #dtest = xgb.DMatrix(X_test)
            y_pred = self.bst.predict(dtest)
            self.cache[test_hash] = y_pred
        return self.cache[test_hash]
開發者ID:ephes,項目名稱:kaggle_homedepot,代碼行數:36,代碼來源:models.py

示例10: prep_X_y

# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def prep_X_y(df, constant=False, split=True):
	cols_to_exclude = ['venue_state', 'venue_name', 'venue_country', 'venue_address', 'ticket_types', 'email_domain', 'description', 'previous_payouts', 'payee_name', 'org_name', 'org_desc', 'object_id', 'name', 'acct_type', 'country', 'listed', 'currency', 'payout_type', 'channels']

	if constant:
		df['const'] = 1

	X = df.drop(cols_to_exclude + ['fraud'], axis=1).values
	y = df['fraud'].values

	print 'columns used:\n', df.drop(cols_to_exclude + ['fraud'], axis=1).columns

	if split:
		X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)

		scaler = StandardScaler()
		X_train = scaler.fit_transform(X_train)
		X_test = scaler.fit_transform(X_test)

		X_smoted, y_smoted = smote(X_train, y_train, target=.5)
		return X_smoted, X_test, y_smoted, y_test
	else:
		scaler = StandardScaler()
		X = scaler.fit_transform(X)
		X_smoted, y_smoted = smote(X, y, target=.5)
		return X_smoted, y_smoted
開發者ID:a-knit,項目名稱:fraud_detector,代碼行數:27,代碼來源:pair_2.py

示例11: data_fr

# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def data_fr(novel_num):
    #if csv_file(novel, novel_num) is True:
    nn = str(novel_num)
    df_novel = pd.read_csv('novel_'+nn+'list_1.csv', header=None)
    try: 
        df_novel['wrd_length'] = df_novel[0].apply(wrd_lengths)
        df_novel['total_char'] = [sum(l) for l in df_novel['wrd_length']]
        df_novel['syl_count'] = df_novel[0].apply(syl_count)
        df_novel['syl_sum'] = [sum(l) for l in df_novel['syl_count']]
        df_novel['sentiment'] = df_novel[0].apply(detect_sentiment)
        #create csv for word to syl to improve syl function
        d = {}
        for l in df_novel[0]:
            sent = TextBlob(l)
            for x in sent.words:
                w = CountSyllables(x)
                d[x] = w
        with open('novel_'+nn+'list_1_syl.csv', 'wb') as f:
            writer = csv.writer(f)
            for row in d.iteritems():
                writer.writerow(row) 
        #create cluster columns
        df_cluster = df_novel.drop('wrd_length', 1)
        df_cluster = df_cluster.drop('syl_count', 1)
        X = df_cluster.drop(0, axis = 1)
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)
        km = KMeans(n_clusters=20, random_state=1)
        km.fit(X_scaled)
        df_cluster_20 = df_cluster.copy()
        df_cluster_20['cluster'] = km.labels_
        df_novel['cluster_20'] = df_cluster_20['cluster']
        #Create cluster 3
        df_cluster_3 = df_cluster.copy()
        X = df_cluster_3.drop(0, axis=1)
        X_scaled = scaler.fit_transform(X)
        km = KMeans(n_clusters = 3, random_state=1)
        km.fit(X_scaled)
        df_cluster_3['cluster'] = km.labels_
        df_novel['cluster_3_syl'] = df_cluster_3['cluster']
        #create cluster 3 no syl
        df_cluster_3no_syl = df_cluster.copy()
        X = df_cluster_3no_syl.drop(0, axis=1)
        X_scaled = scaler.fit_transform(X)
        km = KMeans(n_clusters=3, random_state=1)
        km.fit(X_scaled)
        df_cluster_3no_syl['cluster'] = km.labels_
        df_novel['cluster_3no_syl'] = df_cluster_3no_syl['cluster']
        #Create 5 clusters
        df_cluster_5 = df_cluster.copy()
        X = df_cluster_5.drop(0, axis=1)
        X_scaled = scaler.fit_transform(X)
        km = KMeans(n_clusters=5, random_state=1)
        km.fit(X_scaled)
        df_cluster_5['cluster'] = km.labels_
        df_novel['cluster_5'] = df_cluster_5['cluster']
        df_novel.to_csv('novel_'+nn+'list_1.csv', index=False)
    except:
        rejects_3.append(novel_num)
開發者ID:ravenruckus,項目名稱:novel_processing,代碼行數:61,代碼來源:novel_processing.py

示例12: train_test

# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
    def train_test(self, X, y, X_test):
        """
        """
        sss = StratifiedShuffleSplit(y, 1, test_size=0.5)    
        for train_id, valid_id in sss:
            X0, X1 = X[train_id], X[valid_id]
            y0, y1 = y[train_id], y[valid_id]  
            
        #First half
        
        w0 = np.zeros(len(y0))
        for i in range(len(w0)):
            w0[i] = self.w[int(y0[i])]
        xg0_train = DMatrix(X0, label=y0, weight=w0)  
        xg0_test = DMatrix(X1, label=y1)   
        xgt_test = DMatrix(X_test)
        bst0 = my_train_xgboost(self.param, xg0_train, self.num_round)
        y0_pred = bst0.predict(xg0_test).reshape(X1.shape[0], 9)
        yt_pred = bst0.predict(xgt_test).reshape(X_test.shape[0], 9)
        
        #Calibrated RF
        rf = RandomForestClassifier(n_estimators=600, criterion='gini', 
                class_weight='auto', max_features='auto')
        cal = CalibratedClassifierCV(rf, method='isotonic', cv=3)
        cal.fit(X0, y0)
        y0_cal = cal.predict_proba(X1)
        yt_cal = cal.predict_proba(X_test)
        
        #Second half
        ss = StandardScaler()
        y0_pred = ss.fit_transform(y0_pred)
        yt_pred = ss.fit_transform(yt_pred)
        y0_cal = ss.fit_transform(y0_cal)
        yt_cal = ss.fit_transform(yt_cal)
        X1 = np.hstack((X1, y0_pred, y0_cal))
        X_test = np.hstack((X_test, yt_pred, yt_cal))  
        w1 = np.zeros(len(y1))
        
#        self.param['eta'] = 0.01
        self.num_round = 450

        for i in range(len(w1)):
            w1[i] = self.w[int(y1[i])]
        xg1_train = DMatrix(X1, label=y1, weight=w1)    
        xg_test= DMatrix(X_test)
        bst1 = my_train_xgboost(self.param, xg1_train, self.num_round)
        y_pred = bst1.predict(xg_test).reshape(X_test.shape[0], 9)
        
        return y_pred






                    
        
開發者ID:chrinide,項目名稱:kaggle_otto_group,代碼行數:51,代碼來源:clf_xgboost_split.py

示例13: correlation_matching

# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def correlation_matching(I_tr, T_tr, I_te, T_te, n_comps):
    """ Learns correlation matching (CM) over I_tr and T_tr
        and applies it to I_tr, T_tr, I_te, T_te
        
        
        Parameters
        ----------
        
        I_tr: np.ndarray [shape=(n_tr, d_I)]
            image data matrix for training
        
        T_tr: np.ndarray [shape=(n_tr, d_T)]
            text data matrix for training
        
        I_te: np.ndarray [shape=(n_te, d_I)]
            image data matrix for testing
        
        T_te: np.ndarray [shape=(n_te, d_T)]
            text data matrix for testing
        
        n_comps: int > 0 [scalar]
            number of canonical componens to use
            
        Returns
        -------
        
        I_tr_cca : np.ndarray [shape=(n_tr, n_comps)]
            image data matrix represetned in correlation space
        
        T_tr_cca : np.ndarray [shape=(n_tr, n_comps)]
            text data matrix represetned in correlation space
        
        I_te_cca : np.ndarray [shape=(n_te, n_comps)]
            image data matrix represetned in correlation space
        
        T_te_cca : np.ndarray [shape=(n_te, n_comps)]
            text data matrix represetned in correlation space
        
        """


    # sclale image and text data
    I_scaler = StandardScaler()
    I_tr = I_scaler.fit_transform(I_tr)
    I_te = I_scaler.transform(I_te)

    T_scaler = StandardScaler()
    T_tr = T_scaler.fit_transform(T_tr)
    T_te = T_scaler.transform(T_te)

    cca = PLSCanonical(n_components=n_comps, scale=False)
    cca.fit(I_tr, T_tr)

    I_tr_cca, T_tr_cca = cca.transform(I_tr, T_tr)
    I_te_cca, T_te_cca = cca.transform(I_te, T_te)

    return I_tr_cca, T_tr_cca, I_te_cca, T_te_cca
開發者ID:emanuetre,項目名稱:crossmodal,代碼行數:59,代碼來源:correlation_matching.py

示例14: stack_features

# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def stack_features(params):

    """
    Get local features for all training images together
    """

    # Init detector and extractor
    detector, extractor = init_detect_extract(params)

    # Read image names
    with open(
        os.path.join(params["root"], params["root_save"], params["image_lists"], params["split"] + ".txt"), "r"
    ) as f:
        image_list = f.readlines()

    X = []
    for image_name in image_list:

        # Read image
        im = cv2.imread(
            os.path.join(params["root"], params["database"], params["split"], "images", image_name.rstrip())
        )

        # Resize image
        im = resize_image(params, im)

        feats = image_local_features(im, detector, extractor)
        # Stack all local descriptors together

        if feats is not None:
            if len(X) == 0:

                X = feats
            else:
                X = np.vstack((X, feats))

    if params["normalize_feats"]:
        X = normalize(X)

    if params["whiten"]:

        pca = PCA(whiten=True)
        pca.fit_transform(X)

    else:
        pca = None

    # Scale data to 0 mean and unit variance
    if params["scale"]:

        scaler = StandardScaler()

        scaler.fit_transform(X)
    else:
        scaler = None

    return X, pca, scaler
開發者ID:gdsa-upc,項目名稱:gdsa-suport,代碼行數:59,代碼來源:get_features.py

示例15: main

# 需要導入模塊: from sklearn.preprocessing import StandardScaler [as 別名]
# 或者: from sklearn.preprocessing.StandardScaler import fit_transform [as 別名]
def main():

    df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data',
            header = None,
            sep = '\s+')
    df.columns = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM',
            'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B',
            'LSTAT', 'MEDV']
    print(df.head())

    # Select a subset of the features and plot the correlation between features
    cols = ['LSTAT', 'INDUS', 'NOX', 'RM', 'MEDV']
    sns.pairplot(df[cols], size=2.5);
    plt.title('Correlations between 5 features')
    plt.show()

    # Plot a heatmap of the same subset of features
    cm = np.corrcoef(df[cols].values.T)
    sns.set(font_scale=2.5)
    hm = sns.heatmap(cm,
            cbar = True,
            annot = True,
            square = True,
            fmt = '.2f',
            annot_kws = {'size': 15},
            yticklabels = cols,
            xticklabels = cols)
    plt.show()

    X = df[['RM']].values
    y = df['MEDV'].values

    sc_x = StandardScaler()
    sc_y = StandardScaler()

    X_std = sc_x.fit_transform(X)
    y_std = sc_y.fit_transform(y)
    
    lr = LinearRegressionGD()
    lr.fit(X_std, y_std)

    plt.plot(range(1, lr.n_iter + 1), lr.cost_)
    plt.ylabel('SSE')
    plt.xlabel('Epoch')
    plt.show()

    lin_regplot(X_std, y_std, lr)
    plt.xlabel('Average number of rooms [RM] (standardized)')
    plt.ylabel('Price in $1000\'s [MEDV] (standardized)')
    plt.show()
    
    # Example classification for a house with 5 rooms
    num_rooms_std = sc_x.transform([5.0])
    price_std = lr.predict(num_rooms_std)
    print("Price in $1000's: %.3f" % \
            sc_y.inverse_transform(price_std))
開發者ID:southpaw94,項目名稱:MachineLearning,代碼行數:58,代碼來源:housing.py


注:本文中的sklearn.preprocessing.StandardScaler.fit_transform方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。