当前位置: 首页>>代码示例>>Python>>正文


Python Normalizer.fit_transform方法代码示例

本文整理汇总了Python中sklearn.preprocessing.Normalizer.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python Normalizer.fit_transform方法的具体用法?Python Normalizer.fit_transform怎么用?Python Normalizer.fit_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.preprocessing.Normalizer的用法示例。


在下文中一共展示了Normalizer.fit_transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: explore_k

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit_transform [as 别名]
def explore_k(svd_trans, k_range):
    '''
    Explores various values of k in KMeans

    Args:
        svd_trans: dense array with lsi transformed data
        k_range: the range of k-values to explore
    Returns:
        scores: list of intertia scores for each k value
    '''

    scores = []
    # spherical kmeans, so normalize
    normalizer = Normalizer()
    norm_data = normalizer.fit_transform(svd_trans)
    for k in np.arange:
        km = KMeans(n_clusters=k, init='k-means++', max_iter=100, n_init=1,
                    verbose=2)
        km.fit(norm_data)
        scores.append(-1*km.score(norm_data))
    plt.plot(k_range, scores)
    plt.xlabel('# of clusters')
    plt.ylabel('Inertia')
    sns.despine(offset=5, trim=True)
    return scores
开发者ID:lwoloszy,项目名称:albumpitch,代码行数:27,代码来源:genres.py

示例2: kfold

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit_transform [as 别名]
def kfold(agetext,k,model,k2):
    import collections
    out = []
    for i in range(k):
        print "iteration: "+str(i)
        agetext = shuffle(agetext)
        datatb = agetext.iloc[:,1:]
        label = agetext["agegroup"].tolist()
        X_train, X_test, y_train, y_test = cross_validation.train_test_split(
            datatb, label, test_size=0.15, random_state=i*6)
        data = X_train.values
        counter = collections.Counter(y_train)
        print counter
        testdata = X_test.values
        lsa = TruncatedSVD(k2, algorithm = 'arpack')
        normalizer = Normalizer(copy=False)
        X = lsa.fit_transform(data)
        X = normalizer.fit_transform(X)
        X_test = lsa.transform(testdata)
        X_test = normalizer.transform(X_test)
        model.fit(X,y_train)
        pred = model.predict(X_test)
        counter = collections.Counter(y_test)
        print counter
        counter = collections.Counter(pred)
        print counter
        out.append(round(accuracy_score(y_test, pred),5))
    print str(out)
    print np.mean(out)
开发者ID:hurelyyu,项目名称:CS_Master_UW,代码行数:31,代码来源:AgeGroup.py

示例3: getPcaFeatures

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit_transform [as 别名]
 def getPcaFeatures(self, images, components, image_size):
     imageDataset = self.getImagesAsDataset(images, image_size)
     norm = Normalizer()
     imageDataset = norm.fit_transform(imageDataset)
     pca = PCA(n_components=components)
     imageDataset = pca.fit_transform(imageDataset)
     return pca, norm, imageDataset
开发者ID:tincho4t,项目名称:aaTP,代码行数:9,代码来源:ImagesProcessor.py

示例4: kfold

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit_transform [as 别名]
def kfold(agetext,k,model,nfeatures,check=False,k2 = None,max_df=0.9,min_df=3):
    out = []
    for i in range(k):
        print "iteration: "+str(i)
        agetext = shuffle(agetext)
        X = agetext["text"]
        X = X.tolist()
        label = agetext["agegroup"].tolist()
        vec = TfidfVectorizer(tokenizer = tokenize,token_pattern=r'(?u)\b\w\w+\b|^[_\W]+$',lowercase=False,max_features=nfeatures,max_df = max_df,min_df = min_df,use_idf=True,ngram_range=(1,2))
        docs = []
        for doc in X:
            docs.append(" ".join(doc))
        docs2 = [doc.replace("\t","").replace("\n","") for doc in docs]
        traindocs = docs2[:7999]
        X = vec.fit_transform(traindocs)
        testdocs = docs2[8000:9500]
        X_test = vec.transform(testdocs)
        tlabel = label[:7999]
        testl = label[8000:9500]
        if(check):
            lsa = TruncatedSVD(k2, algorithm = 'arpack')
            normalizer = Normalizer(copy=False)
            X = lsa.fit_transform(X)
            X = normalizer.fit_transform(X)
            X_test = lsa.transform(X_test)
            X_test = normalizer.transform(X_test)
        model.fit(X,tlabel)
        pred = model.predict(X_test)
        out.append(round(accuracy_score(testl, pred),2))
    print str(out)
    print np.mean(out)
开发者ID:hurelyyu,项目名称:CS_Master_UW,代码行数:33,代码来源:TMClassCopy.py

示例5: preprocess

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit_transform [as 别名]
def preprocess(data, n_components, use_tf_idf=True):
    """
    Preproecess the data for clustering by running SVD and
    normalizing the results. This process is also known as
    LSA.

    arguments:
    data -- Dataset, if tf_idf is Truethe object must contain a
            tf_idf table alongside a raw frequencies dataframe.
    n_components -- int, the number of components to use for the SVD
                    a minimum of 100 is recommended.
    use_tf_idf -- bool, whether to use the tf-idf frequencies for the
                  preprocessing.

    returns:
    e -- float, a measure of variance explained by the SVD.
    X -- np.array, an array with the data reduced to n_components.
    """
    if use_tf_idf:
        d = data.tf_idf.as_matrix()
    else:
        d = data.df.as_matrix()
    svd = TruncatedSVD(n_components=n_components)
    X = svd.fit_transform(d)
    norm = Normalizer()

    # Record a measure of explained variance
    e = svd.explained_variance_ratio_.sum()*100
    return e, norm.fit_transform(d)
开发者ID:marcomorucci,项目名称:Clustering-Constitutions,代码行数:31,代码来源:analyze.py

示例6: normalize_test

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit_transform [as 别名]
def normalize_test():
    X=[1,2,3,4,5,2,6,8]
    from sklearn.preprocessing import Normalizer
    normalizer = Normalizer()
    X2 = normalizer.fit_transform(X)

    print X2
开发者ID:swenker,项目名称:bigdata,代码行数:9,代码来源:scikit_lab.py

示例7: _normalize

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit_transform [as 别名]
    def _normalize(self, X, y, X_t):
        from sklearn.preprocessing import Normalizer
        NORM = Normalizer()

        X = NORM.fit_transform(X, y)
        X_t = NORM.transform(X_t)

        return X, X_t
开发者ID:mikbuch,项目名称:pymri,代码行数:10,代码来源:datasets.py

示例8: kmeans

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit_transform [as 别名]
def kmeans(tfidf, svd, svd_trans, k=200, n_words=10):
    '''
    Performs k-means clustering on svd transformed data and plots it

    Args:
        tfidf: sklearn fitted TfidfVectorizer
        svd: sklearn fitted TruncatedSVD
        svd_trans: dense array with lsi transformed data
        k: the k in k-means
    Returns:
        km: the fitted KMean object
    '''

    # spherical kmeans, so normalize
    normalizer = Normalizer()
    norm_data = normalizer.fit_transform(svd_trans)
    km = KMeans(n_clusters=k, init='k-means++', max_iter=100, n_init=5,
                verbose=2)
    km.fit(norm_data)

    original_space_centroids = svd.inverse_transform(km.cluster_centers_)
    order_centroids = original_space_centroids.argsort()[:, ::-1]

    terms = tfidf.get_feature_names()
    terms = prettify(terms)
    terms = np.array(terms)
    fig = plt.figure(figsize=(10, 8))
    for i in range(10):
        print("Cluster {:d}:".format(i))
        for ind in order_centroids[i, :n_words]:
            print(' {:s}'.format(terms[ind]))
        print('\n')

        # Make a figure and axes with dimensions as desired.
        ax = fig.add_subplot(2, 5, i+1)
        ax.set_title('Cluster {:d}'.format(i+1))

        component = order_centroids[i]
        cmap = plt.cm.Purples
        mn = np.min(component[:n_words])
        mx = np.max(component[:n_words])
        norm = mpl.colors.Normalize(mn, mx)

        cb = mpl.colorbar.ColorbarBase(ax, cmap=cmap, norm=norm,
                                       orientation='vertical')
        # sorted_component = np.sort(component)
        colors = sns.color_palette('Purples', 9).as_hex()
        colors = np.repeat(colors[-1], n_words)

        cb.set_ticks(np.linspace(mn, mx, n_words+2)[1:-1])
        cb.ax.yaxis.set_tick_params(size=0)
        cb.ax.tick_params(labelsize=10)
        for color, tick in zip(colors, cb.ax.get_yticklabels()):
            tick.set_color(color)
            tick.set_fontsize(14)
        cb.set_ticklabels(np.array(terms)[order_centroids[i, :n_words][::-1]])
    plt.tight_layout()
    return km
开发者ID:lwoloszy,项目名称:albumpitch,代码行数:60,代码来源:genres.py

示例9: reduce_dimension

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit_transform [as 别名]
    def reduce_dimension(self, n_components=2):
        """ Return PCA transform of self.data, with n_components. """

        reducer = PCA(n_components=n_components)

        X = self.data.values

        norm = Normalizer()
        Xnorm = norm.fit_transform(X)

        return reducer.fit_transform(Xnorm)
开发者ID:abshinn,项目名称:practice,代码行数:13,代码来源:pima.py

示例10: make_nn_regression

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit_transform [as 别名]
def make_nn_regression(n_samples=100, n_features=100, n_informative=10,
                       dense=False, noise=0.0, test_size=0,
                       normalize_x=True, normalize_y=True,
                       shuffle=True, random_state=None):

    X, y, w = _make_nn_regression(n_samples=n_samples,
                                  n_features=n_features,
                                  n_informative=n_informative,
                                  shuffle=shuffle,
                                  random_state=random_state)

    if dense:
        X = X.toarray()

    if test_size > 0:
        cv = ShuffleSplit(len(y), n_iter=1, random_state=random_state,
                          test_size=test_size, train_size=1-test_size)

        train, test = list(cv)[0]
        X_train, y_train = X[train], y[train]
        X_test, y_test = X[test], y[test]
        if not dense:
            X_train.sort_indices()
            X_test.sort_indices()
    else:
        X_train, y_train = X, y
        if not dense:
            X_train.sort_indices()
        X_test, y_test = None, None

    # Add noise
    if noise > 0.0:
        generator = check_random_state(random_state)
        y_train += generator.normal(scale=noise * np.std(y_train),
                                    size=y_train.shape)
        y_train = np.maximum(y_train, 0)

    if normalize_x:
        normalizer = Normalizer()
        X_train = normalizer.fit_transform(X_train)
        if X_test is not None:
            X_test = normalizer.transform(X_test)

    if normalize_y:
        scaler = MinMaxScaler()
        y_train = scaler.fit_transform(y_train.reshape(-1, 1)).ravel()
        if y_test is not None:
            y_test = scaler.transform(y_test.reshape(-1, 1)).ravel()

    if X_test is not None:
        return X_train, y_train, X_test, y_test, w
    else:
        return X_train, y_train, w
开发者ID:RPGOne,项目名称:sebabulba,代码行数:55,代码来源:samples_generator.py

示例11: get_tf_idf_M

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit_transform [as 别名]
def get_tf_idf_M(M, tf = ["bin", "raw", "log", "dnorm"], idf = ["c", "smooth", "max", "prob"], norm_samps=False):
    N = len(M)
    if tf == "raw":
        tf_M = np.copy(M) #just the frequency of the word in a text
#    #TODO: check if dnorm is implemented OK
#    elif tf == "dnorm":
#        tf_M = 0.5 + 0.5*(M/(np.amax(M, axis=1).reshape((N,1))))
    if idf == "c":
        idf_v = []
        for i in range(M.shape[1]): #get the number of texts that contain a word words[i]
            idf_v.append(np.count_nonzero(M[:,i])) #count the non zero values in columns of matrix M
        idf_v = np.array(idf_v)
        idf_v = np.log(N/idf_v)
    tf_idf_M = tf_M*idf_v
    if norm_samps:
        normalizer = Normalizer()
        tf_idf_M = normalizer.fit_transform(tf_idf_M)
#    np.savetxt("tf_idf_M_" + str(N) + ".txt", tf_idf_M , fmt="%s")
    return tf_idf_M
        
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
开发者ID:RokIvansek,项目名称:Spectral-clustering-HW,代码行数:21,代码来源:newsgroups20.py

示例12: lstm_validate

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit_transform [as 别名]
def lstm_validate(lstm_model, evaluation_dataset, create_confusion_matrix=False, number_of_subframes=0, sample_strategy="random", batch_size=32):
	
	print("evaluate neural network...")
	validation_data = []
	validation_labels = []
	
	accuracy = 0
	n = 0
	idx = 0

	
	for _obj in evaluation_dataset:
		if number_of_subframes > 0:
			validation_data.append(get_buckets(_obj.get_hoj_set(), number_of_subframes, sample_strategy))
		else:
			validation_data.append(_obj.get_hoj_set())
		validation_labels.append(_obj.get_hoj_label()[0])


	# evaluate neural network
	score, acc = lstm_model.evaluate(np.array(validation_data), np.array(validation_labels), batch_size=batch_size, verbose=0)
			
	print("Accuracy:",acc)

	if create_confusion_matrix is True:
		predictions = lstm_model.predict(np.array(validation_data),batch_size = batch_size)
		
		predicted_labels = []
		real_labels = []

		for k in range(len(predictions)):
			predicted_idx = np.argmax(predictions[k])

			label_idx = np.argmax(validation_labels[k])
			
			real_labels.append(label_idx)
			predicted_labels.append(predicted_idx)


		cnf_matrix = confusion_matrix(real_labels, predicted_labels)

		norm = Normalizer()
		cnf_matrix = norm.fit_transform(cnf_matrix)

		return score, acc, cnf_matrix


	return score, acc, None
开发者ID:Nudelreaktor,项目名称:pyNTU_HoJ_LSTM,代码行数:50,代码来源:lstm.py

示例13: FFTTransformer

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit_transform [as 别名]
class FFTTransformer(TransformerMixin, BaseEstimator):
    def __init__(self):
        self.model = LinearRegression()
        self.y_mean = None
        self.normalize = Normalizer()

    def fit(self, X, y=None):
        X_train = self.make_waves(X)
        y_train = numpy.array(y)
        self.y_mean = y_train.mean()
        self.model.fit(X_train, y_train - self.y_mean)
        return self

    def make_waves(self, X):
        X = X['times']
        time_scale = numpy.array([(time - X[0]).total_seconds() for time in X]).reshape(-1, 1)
        X_train = [
            numpy.concatenate((
                numpy.pi * 2.0 / (24 * 60 * 60) * delta,
                numpy.pi * 2.0 / (12 * 60 * 60) * delta,
                numpy.pi * 2.0 / (6 * 60 * 60) * delta,

                numpy.pi * 2.0 / (7 * 24 * 60 * 60) * delta,
                numpy.pi * 2.0 / (7.0 / 2 * 24 * 60 * 60) * delta,
                numpy.pi * 2.0 / (7.0 / 3 * 24 * 60 * 60) * delta,

                numpy.pi * 2.0 / (1380500.0) * delta,
                numpy.pi * 2.0 / (1380500.0 / 2) * delta,
                numpy.pi * 2.0 / (1380500.0 / 3) * delta), axis=0)
            for delta in time_scale]

        X_train = numpy.concatenate((numpy.sin(X_train), numpy.cos(X_train)), axis=1)
        return X_train

    def predict(self, X):
        X_test = self.make_waves(X)
        X_test = self.model.predict(X_test) + self.y_mean
        return X_test.reshape(-1, 1)

    def transform(self, X, y=None):
        X_test = self.predict(X)
        X_test = self.normalize.fit_transform(X_test)
        return X_test.reshape(-1, 1)
开发者ID:wing00,项目名称:TwitchExMachina,代码行数:45,代码来源:transform.py

示例14: perform_classification

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit_transform [as 别名]
def perform_classification (corpus_dir, extn, embedding_fname, class_labels_fname):
    '''
    Perform classification from
    :param corpus_dir: folder containing subgraph2vec sentence files
    :param extn: extension of subgraph2vec sentence files
    :param embedding_fname: file containing subgraph vectors in word2vec format (refer Mikolov et al (2013) code)
    :param class_labels_fname: files containing labels of each graph
    :return: None
    '''
    gensim_model = gensim.models.KeyedVectors.load_word2vec_format(fname=embedding_fname)
    logging.info('Loaded gensim model of subgraph vectors')

    subgraph_vocab = sorted(gensim_model.vocab.keys())
    logging.info('Vocab consists of {} subgraph features'.format(len(subgraph_vocab)))

    wlk_files = get_files(corpus_dir, extn)
    logging.info('Loaded {} graph WL kernel files for performing classification'.format(len(wlk_files)))
    c_vectorizer = CountVectorizer(input='filename',
                                   tokenizer=subgraph2vec_tokenizer,
                                   lowercase=False,
                                   vocabulary=subgraph_vocab)
    normalizer = Normalizer()

    X = c_vectorizer.fit_transform(wlk_files)
    X = normalizer.fit_transform(X)
    logging.info('X (sample) matrix shape: {}'.format(X.shape))


    Y = np.array(get_class_labels(wlk_files, class_labels_fname))
    logging.info('Y (label) matrix shape: {}'.format(Y.shape))

    seed = randint(0, 1000)
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, random_state=seed)
    logging.info('Train and Test matrix shapes: {}, {}, {}, {} '.format(X_train.shape, X_test.shape,
                                                                        Y_train.shape, Y_test.shape))

    linear_kernel_svm_classify(X_train, X_test, Y_train, Y_test)

    subgraph_kernel = get_subgraph_kernel (gensim_model, subgraph_vocab)
    deep_kernel_svm_classify (X_train, X_test, Y_train, Y_test, subgraph_kernel)
开发者ID:SongFGH,项目名称:subgraph2vec_tf,代码行数:42,代码来源:classify.py

示例15: vectorize

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit_transform [as 别名]
def vectorize(n, comp=0):
    tfv = TfidfVectorizer(min_df=1, strip_accents='unicode', ngram_range=(1,2), stop_words='english',
        sublinear_tf=True, use_idf=True, smooth_idf=True)

    # Fit and transform
    X = tfv.fit_transform(boiler_stream(trainfnm, n))
    lsa = None
    scaler = None
    if comp > 0:
        lsa = TruncatedSVD(comp)
        scaler = Normalizer(copy=False)
        X = lsa.fit_transform(X)
        X = scaler.fit_transform(X)

    # Transform only
    Z = tfv.transform(boiler_stream(testfnm, n))
    if lsa:
        Z = lsa.transform(Z)
        Z = scaler.transform(Z)
    
    np.save(trainvecfnm, X)
    np.save(testvecfnm, Z)
开发者ID:damonzon,项目名称:kaggle,代码行数:24,代码来源:vectorize.py


注:本文中的sklearn.preprocessing.Normalizer.fit_transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。