当前位置: 首页>>代码示例>>Python>>正文


Python Normalizer.transform方法代码示例

本文整理汇总了Python中sklearn.preprocessing.Normalizer.transform方法的典型用法代码示例。如果您正苦于以下问题:Python Normalizer.transform方法的具体用法?Python Normalizer.transform怎么用?Python Normalizer.transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.preprocessing.Normalizer的用法示例。


在下文中一共展示了Normalizer.transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: TfIdf

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import transform [as 别名]
class TfIdf(Feature):
    def __init__(self):
        self.kbest = None
        self.vect = None
        self.truncated = None
        self.normalizer = None

    def train(self, reviews, labels):
        self.vect = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), stop_words='english')

        reviews_text = [' '.join(list(chain.from_iterable(review))) for review in reviews]
        tfidf_matrix = self.vect.fit_transform(reviews_text).toarray()

        self.truncated = TruncatedSVD(n_components=50)
        self.truncated.fit(tfidf_matrix, labels)

        trunc = self.truncated.transform(tfidf_matrix)
        self.normalizer = Normalizer()
        self.normalizer.fit(trunc)

        self.kbest = SelectKBest(f_classif, k=5)
        self.kbest.fit(self.normalizer.transform(trunc), labels)

    def score(self, data):
        reviews_text = ' '.join(list(chain.from_iterable(data)))
        tfidf_matrix = self.vect.transform([reviews_text]).toarray()

        trunc = self.truncated.transform(tfidf_matrix)

        return tuple(self.kbest.transform(self.normalizer.transform(trunc))[0, :])
开发者ID:EdwardBetts,项目名称:Yulp,代码行数:32,代码来源:tfidf.py

示例2: __init__

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import transform [as 别名]
    def __init__(self, 
                 YTrain_file,
                 XTrain_file,
                 XTest_file,
                 output_path,
                 normalise,
                 C,
                 class_weight,
                 ):
        """
        Arguments:
      
        """
        self.YTrain = joblib.load(YTrain_file)
        XTrain = joblib.load(XTrain_file)
        self.XTrain = XTrain.reshape(np.size(XTrain, axis=0), -1)
       
        XTest = joblib.load(XTest_file)   
        self.XTest = XTest.reshape(np.size(XTest, axis=0), -1)

        self.output_path = output_path
    
        if normalise:
            normalizer = Normalizer(copy=False)
            normalizer.transform(self.XTrain)
            normalizer.transform(self.XTest)

        self.C = C
        if class_weight == 'none':
            class_weight = None
        self.class_weight = class_weight
开发者ID:dchall88,项目名称:DIGITS,代码行数:33,代码来源:svm_train_test.py

示例3: KNN

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import transform [as 别名]
class KNN(Model):

    def __init__(self, X_train, y_train, X_val, y_val):
        super().__init__()
        self.normalizer = Normalizer()
        self.normalizer.fit(X_train)
        self.clf = neighbors.KNeighborsRegressor(n_neighbors=10, weights='distance', p=1)
        self.clf.fit(self.normalizer.transform(X_train), numpy.log(y_train))
        print("Result on validation data: ", self.evaluate(self.normalizer.transform(X_val), y_val))

    def guess(self, feature):
        return numpy.exp(self.clf.predict(self.normalizer.transform(feature)))
开发者ID:codeaudit,项目名称:entity-embedding-rossmann,代码行数:14,代码来源:models.py

示例4: load_data

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import transform [as 别名]
    def load_data(self):
        if not os.path.exists('features_train.txt'):
            self.feature_extraction('train.txt', 'features_train.txt')
        data_train, target_train = load_svmlight_file('features_train.txt')

        if not os.path.exists('features_test.txt'):
            self.feature_extraction('test.txt', 'features_test.txt')
        data_test, target_test = load_svmlight_file('features_test.txt')

        normalizer = Normalizer().fit(data_train)
        data_train = normalizer.transform(data_train)
        data_test = normalizer.transform(data_test)

        return data_train.toarray(), target_train, data_test.toarray(), target_test
开发者ID:Max-Zhu,项目名称:Trec,代码行数:16,代码来源:point_rank.py

示例5: kfold

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import transform [as 别名]
def kfold(agetext,k,model,nfeatures,check=False,k2 = None,max_df=0.9,min_df=3):
    out = []
    for i in range(k):
        print "iteration: "+str(i)
        agetext = shuffle(agetext)
        X = agetext["text"]
        X = X.tolist()
        label = agetext["agegroup"].tolist()
        vec = TfidfVectorizer(tokenizer = tokenize,token_pattern=r'(?u)\b\w\w+\b|^[_\W]+$',lowercase=False,max_features=nfeatures,max_df = max_df,min_df = min_df,use_idf=True,ngram_range=(1,2))
        docs = []
        for doc in X:
            docs.append(" ".join(doc))
        docs2 = [doc.replace("\t","").replace("\n","") for doc in docs]
        traindocs = docs2[:7999]
        X = vec.fit_transform(traindocs)
        testdocs = docs2[8000:9500]
        X_test = vec.transform(testdocs)
        tlabel = label[:7999]
        testl = label[8000:9500]
        if(check):
            lsa = TruncatedSVD(k2, algorithm = 'arpack')
            normalizer = Normalizer(copy=False)
            X = lsa.fit_transform(X)
            X = normalizer.fit_transform(X)
            X_test = lsa.transform(X_test)
            X_test = normalizer.transform(X_test)
        model.fit(X,tlabel)
        pred = model.predict(X_test)
        out.append(round(accuracy_score(testl, pred),2))
    print str(out)
    print np.mean(out)
开发者ID:hurelyyu,项目名称:CS_Master_UW,代码行数:33,代码来源:TMClassCopy.py

示例6: kfold

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import transform [as 别名]
def kfold(agetext,k,model,k2):
    import collections
    out = []
    for i in range(k):
        print "iteration: "+str(i)
        agetext = shuffle(agetext)
        datatb = agetext.iloc[:,1:]
        label = agetext["agegroup"].tolist()
        X_train, X_test, y_train, y_test = cross_validation.train_test_split(
            datatb, label, test_size=0.15, random_state=i*6)
        data = X_train.values
        counter = collections.Counter(y_train)
        print counter
        testdata = X_test.values
        lsa = TruncatedSVD(k2, algorithm = 'arpack')
        normalizer = Normalizer(copy=False)
        X = lsa.fit_transform(data)
        X = normalizer.fit_transform(X)
        X_test = lsa.transform(testdata)
        X_test = normalizer.transform(X_test)
        model.fit(X,y_train)
        pred = model.predict(X_test)
        counter = collections.Counter(y_test)
        print counter
        counter = collections.Counter(pred)
        print counter
        out.append(round(accuracy_score(y_test, pred),5))
    print str(out)
    print np.mean(out)
开发者ID:hurelyyu,项目名称:CS_Master_UW,代码行数:31,代码来源:AgeGroup.py

示例7: test_normalizer_l1

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import transform [as 别名]
def test_normalizer_l1():
    rng = np.random.RandomState(0)
    X_dense = rng.randn(4, 5)
    X_sparse_unpruned = sp.csr_matrix(X_dense)

    # set the row number 3 to zero
    X_dense[3, :] = 0.0

    # set the row number 3 to zero without pruning (can happen in real life)
    indptr_3 = X_sparse_unpruned.indptr[3]
    indptr_4 = X_sparse_unpruned.indptr[4]
    X_sparse_unpruned.data[indptr_3:indptr_4] = 0.0

    # build the pruned variant using the regular constructor
    X_sparse_pruned = sp.csr_matrix(X_dense)

    # check inputs that support the no-copy optim
    for X in (X_dense, X_sparse_pruned, X_sparse_unpruned):

        normalizer = Normalizer(norm='l1', copy=True)
        X_norm = normalizer.transform(X)
        assert X_norm is not X
        X_norm1 = toarray(X_norm)

        normalizer = Normalizer(norm='l1', copy=False)
        X_norm = normalizer.transform(X)
        assert X_norm is X
        X_norm2 = toarray(X_norm)

        for X_norm in (X_norm1, X_norm2):
            row_sums = np.abs(X_norm).sum(axis=1)
            for i in range(3):
                assert_almost_equal(row_sums[i], 1.0)
            assert_almost_equal(row_sums[3], 0.0)

    # check input for which copy=False won't prevent a copy
    for init in (sp.coo_matrix, sp.csc_matrix, sp.lil_matrix):
        X = init(X_dense)
        X_norm = normalizer = Normalizer(norm='l2', copy=False).transform(X)

        assert X_norm is not X
        assert isinstance(X_norm, sp.csr_matrix)

        X_norm = toarray(X_norm)
        for i in xrange(3):
            assert_almost_equal(row_sums[i], 1.0)
        assert_almost_equal(la.norm(X_norm[3]), 0.0)
开发者ID:Yangqing,项目名称:scikit-learn,代码行数:49,代码来源:test_preprocessing.py

示例8: _normalize

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import transform [as 别名]
    def _normalize(self, X, y, X_t):
        from sklearn.preprocessing import Normalizer
        NORM = Normalizer()

        X = NORM.fit_transform(X, y)
        X_t = NORM.transform(X_t)

        return X, X_t
开发者ID:mikbuch,项目名称:pymri,代码行数:10,代码来源:datasets.py

示例9: readAndPreProcess

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import transform [as 别名]
def readAndPreProcess():
	print("\n\n********** CS-412 HW5 Mini Project **********")
	print("************ Submitted by Sankul ************\n\n")
	print("Reading data, please ensure that the dataset is in same folder.")
	resp = pd.read_csv('responses.csv')
	print("Data reading complete!")
	print("Some stats reagarding data:")
	resp.describe()
	
	print("\nStarting pre-processing.....")
	
	print("\nFinding missing values:")
	print("Missing values found, removing them")
	emptyVals = resp.isnull().sum().sort_values(ascending=False)
	emptyPlot = emptyVals.plot(kind='barh', figsize = (20,35))
	plt.show()
	print("Empty values removed")
	
	print("\nChecking for NaN and infinite values in target column (Empathy):")
	if len(resp['Empathy']) - len(resp[np.isfinite(resp['Empathy'])]):
		print("Number of infinite or NaN values in Empathy column: ", len(resp['Empathy']) - len(resp[np.isfinite(resp['Empathy'])]))
		print("Removing them")
		resp = resp[np.isfinite(resp['Empathy'])]
		print("Infinite and NaN values removed")
		
	print("\nChecking for categorical features:")
	if pd.Categorical(resp).dtype.name == 'category':
		print("Categorical features found. Removing them...")
		resp = resp.select_dtypes(exclude=[object])	
		print("Categorical features removed")
		
	print("\nReplacing NaN values with the mean value:")
	resp=resp.fillna(resp.mean()) 
	resp.isnull().sum()
	print("Values replaced")
	
	print("\nSeperating labels from data:")
	Y = resp['Empathy'].values
	X = resp.drop('Empathy',axis=1)
	print("Labels seperated")
	
	print("\nScaling, standardizing and normalizing the data:")
	scaler = MinMaxScaler(feature_range=(0, 1))
	rescaledX = scaler.fit_transform(X)
	
	scaler = StandardScaler().fit(rescaledX)
	standardizedX = scaler.transform(rescaledX)
	
	normalizer = Normalizer().fit(standardizedX)
	normalizedX = normalizer.transform(standardizedX)
	print("Scaling, standardizing and normalizing completed")
	
	print("\nFinal data looks like:")
	print(normalizedX.shape)
	print("Values inside look like:")
	print(normalizedX[0])
	
	return normalizedX,Y
开发者ID:dark-shade,项目名称:CS-412-IML-HW5-Mini-Project,代码行数:60,代码来源:hw5.py

示例10: ScikitNormalizer

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import transform [as 别名]
class ScikitNormalizer(object):
    def __init__(self):
        self.data_normalizer = Normalizer()

    def fit(self, data):
        self.data_normalizer.fit(data)

    def transform(self, data):
        return (self.data_normalizer.transform(data) + 1) / 2
开发者ID:Falgunithakor,项目名称:SummerResearchDE-BPSO,代码行数:11,代码来源:Normalizer.py

示例11: test_ver2_syntetic_dataset

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import transform [as 别名]
    def test_ver2_syntetic_dataset(self):

        self.ex = experiment.Experiment()
        self.ex.cf_matrix = load_sparse_data('syntetic_cf.dat')
        n = Normalizer(norm='l2', copy=True)
        self.ex.cf_matrix = n.transform(self.ex.cf_matrix) #normalized.
        self.ex.cb_prox = experiment.Experiment.load_data(PKL + 'cb_prox.pkl')
        self.ex.cf_prox = self.ex.cf_matrix * self.ex.cf_matrix.T
        self.ex.test_corr_sparsity(draw=True, interval=100)
开发者ID:osmanbaskaya,项目名称:acm_mak,代码行数:11,代码来源:unittest_experiment.py

示例12: make_nn_regression

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import transform [as 别名]
def make_nn_regression(n_samples=100, n_features=100, n_informative=10,
                       dense=False, noise=0.0, test_size=0,
                       normalize_x=True, normalize_y=True,
                       shuffle=True, random_state=None):

    X, y, w = _make_nn_regression(n_samples=n_samples,
                                  n_features=n_features,
                                  n_informative=n_informative,
                                  shuffle=shuffle,
                                  random_state=random_state)

    if dense:
        X = X.toarray()

    if test_size > 0:
        cv = ShuffleSplit(len(y), n_iter=1, random_state=random_state,
                          test_size=test_size, train_size=1-test_size)

        train, test = list(cv)[0]
        X_train, y_train = X[train], y[train]
        X_test, y_test = X[test], y[test]
        if not dense:
            X_train.sort_indices()
            X_test.sort_indices()
    else:
        X_train, y_train = X, y
        if not dense:
            X_train.sort_indices()
        X_test, y_test = None, None

    # Add noise
    if noise > 0.0:
        generator = check_random_state(random_state)
        y_train += generator.normal(scale=noise * np.std(y_train),
                                    size=y_train.shape)
        y_train = np.maximum(y_train, 0)

    if normalize_x:
        normalizer = Normalizer()
        X_train = normalizer.fit_transform(X_train)
        if X_test is not None:
            X_test = normalizer.transform(X_test)

    if normalize_y:
        scaler = MinMaxScaler()
        y_train = scaler.fit_transform(y_train.reshape(-1, 1)).ravel()
        if y_test is not None:
            y_test = scaler.transform(y_test.reshape(-1, 1)).ravel()

    if X_test is not None:
        return X_train, y_train, X_test, y_test, w
    else:
        return X_train, y_train, w
开发者ID:RPGOne,项目名称:sebabulba,代码行数:55,代码来源:samples_generator.py

示例13: normalize

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import transform [as 别名]
 def normalize(self, msi, norm="l1"):
     original_shape = msi.get_image().shape
     collapsed_image = collapse_image(msi.get_image())
     # temporarily save mask, since scipy normalizer removes mask
     is_masked_array = isinstance(msi.get_image(), np.ma.MaskedArray)
     if is_masked_array:
         mask = msi.get_image().mask
     normalizer = Normalizer(norm=norm)
     normalized_image = normalizer.transform(collapsed_image)
     if is_masked_array:
         normalized_image = np.ma.MaskedArray(normalized_image, mask=mask)
     msi.set_image(np.reshape(normalized_image, original_shape))
开发者ID:151706061,项目名称:MITK,代码行数:14,代码来源:normalize.py

示例14: test_normalizer_vs_sklearn

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import transform [as 别名]
def test_normalizer_vs_sklearn():
    # Compare msmbuilder.preprocessing.Normalizer
    # with sklearn.preprocessing.Normalizer

    normalizerr = NormalizerR()
    normalizerr.fit(np.concatenate(trajs))

    normalizer = Normalizer()
    normalizer.fit(trajs)

    y_ref1 = normalizerr.transform(trajs[0])
    y1 = normalizer.transform(trajs)[0]

    np.testing.assert_array_almost_equal(y_ref1, y1)
开发者ID:Eigenstate,项目名称:msmbuilder,代码行数:16,代码来源:test_preprocessing.py

示例15: SiftBOW

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import transform [as 别名]
class SiftBOW(object):
    def __init__(self, dataset, n_words=300, add_global_desc=True,
                 color_sift=False):
        self.dataset = dataset
        self.n_words = n_words
        self.add_global_desc = add_global_desc
        self.normalizer = Normalizer(norm='l1')
        self.color_sift = color_sift
        if self.color_sift:
            self.feature_extractor = color_sift_descriptors
        else:
            self.feature_extractor = sift_descriptors

    def fit_transform(self, image_names, superpixels):
        descriptors, coordinates = self.feature_extractor(image_names,
                                                          self.dataset)
        print("end sift descriptors")
        vq, X = bag_of_words(descriptors, superpixels, coordinates)
        X = [self.normalizer.transform(x) for x in X]

        self.vq_ = vq
        Y = [gt_in_sp(self.dataset, f, sp) for f, sp in zip(image_names,
                                                            superpixels)]
        return DataBunch(X, Y, image_names, superpixels)

    def fit(self, image_names, spixel):
        self.fit_predict(image_names, spixel)
        return self

    def transform(self, image_names, superpixels):
        descriptors, coordinates = self.feature_extractor(image_names,
                                                          self.dataset)
        _, X = bag_of_words(descriptors, superpixels, coordinates, vq=self.vq_)
        Y = [gt_in_sp(self.dataset, f, sp) for f, sp in zip(image_names,
                                                            superpixels)]
        X = [self.normalizer.transform(x) for x in X]
        return DataBunch(X, Y, image_names, superpixels)
开发者ID:amueller,项目名称:segmentation,代码行数:39,代码来源:bow.py


注:本文中的sklearn.preprocessing.Normalizer.transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。