当前位置: 首页>>代码示例>>Python>>正文


Python decomposition.TruncatedSVD类代码示例

本文整理汇总了Python中sklearn.decomposition.TruncatedSVD的典型用法代码示例。如果您正苦于以下问题:Python TruncatedSVD类的具体用法?Python TruncatedSVD怎么用?Python TruncatedSVD使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了TruncatedSVD类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: tfIDFeats

def tfIDFeats(ids,data):


    # the infamous tfidf vectorizer (Do you remember this one?)
    tfv = TfidfVectorizer(min_df=3,  max_features=None, 
            strip_accents='unicode', analyzer='word',token_pattern=r'\w{1,}',
            ngram_range=(1, 5), use_idf=1,smooth_idf=1,sublinear_tf=1,
            stop_words = 'english')
    # Fit TFIDF
    tfv.fit(data)
    X =  tfv.transform(data) 
        # Initialize SVD

    svd = TruncatedSVD(n_components=350)
    
    # Initialize the standard scaler 
    scl = StandardScaler( with_mean=False)
    
    
    
    if X.shape[1]>350:
        X = svd.fit_transform(X)
    X = scl.fit_transform(X,ids)
    if plotData:
        X = PCA(n_components=2).fit_transform(X)
    return (X,ids)
开发者ID:mostafaelaraby,项目名称:articles-clustering,代码行数:26,代码来源:clusterRelated.py

示例2: find_k

 def find_k(self, rank=None, max_clusters=1, vertline=None):
     
     if rank != None:
         svd = TruncatedSVD(rank)
         self.X = svd.fit_transform(self.X)
         self.X = Normalizer(copy=False).fit_transform(self.X)
     
     k_range = range(1, max_clusters)
     clusters = [KMeans(n_clusters=k).fit(self.X) for k in k_range]
     centroids = [cluster.cluster_centers_ for cluster in clusters]
     k_cosine = [cdist(self.X, cent, metric='cosine') for cent in centroids]
     dist = [np.min(k_cos, axis=1) for k_cos in k_cosine]
     
     wcss = [sum(d[np.isnan(d) == False]**2) for d in dist] # Within cluster sum of squares
     tss = sum(pdist(self.X)**2)/self.X.shape[0] # Total sum of squares
     bss = tss - wcss # Explained variance
             
     fig, (ax1, ax2) = plt.subplots(1, 2)
     fig.set_size_inches(10, 3)
     plt.tight_layout()
     
     ax1.set_title('BSS')
     ax1.plot(np.arange(1, len(bss)+1), bss)
     ax1.scatter(np.arange(1, len(bss)+1), bss)        
     ax2.set_title('WCSS')
     ax2.plot(np.arange(1, len(wcss)+1), wcss)
     ax2.scatter(np.arange(1, len(wcss)+1), wcss)
     plt.axvline(vertline, c='red', alpha=0.75) if vertline != None else None
         
     plt.show()
开发者ID:hugsnotpugs,项目名称:WhoReadsXKCD,代码行数:30,代码来源:ScreePlots.py

示例3: test_feature_union

def test_feature_union():
    # basic sanity check for feature union
    iris = load_iris()
    X = iris.data
    X -= X.mean(axis=0)
    y = iris.target
    svd = TruncatedSVD(n_components=2, random_state=0)
    select = SelectKBest(k=1)
    fs = FeatureUnion([("svd", svd), ("select", select)])
    fs.fit(X, y)
    X_transformed = fs.transform(X)
    assert_equal(X_transformed.shape, (X.shape[0], 3))

    # check if it does the expected thing
    assert_array_almost_equal(X_transformed[:, :-1], svd.fit_transform(X))
    assert_array_equal(X_transformed[:, -1],
                       select.fit_transform(X, y).ravel())

    # test if it also works for sparse input
    # We use a different svd object to control the random_state stream
    fs = FeatureUnion([("svd", svd), ("select", select)])
    X_sp = sparse.csr_matrix(X)
    X_sp_transformed = fs.fit_transform(X_sp, y)
    assert_array_almost_equal(X_transformed, X_sp_transformed.toarray())

    # test setting parameters
    fs.set_params(select__k=2)
    assert_equal(fs.fit_transform(X, y).shape, (X.shape[0], 4))

    # test it works with transformers missing fit_transform
    fs = FeatureUnion([("mock", TransfT()), ("svd", svd), ("select", select)])
    X_transformed = fs.fit_transform(X, y)
    assert_equal(X_transformed.shape, (X.shape[0], 8))
开发者ID:Givonaldo,项目名称:scikit-learn,代码行数:33,代码来源:test_pipeline.py

示例4: train_manual

def train_manual():
    with open("../data/f_hashtag_prediction/train_data_tweets_processed_0_to_500K.txt") as ftrain:
        with open("../data/f_hashtag_prediction/test_data_tagged_processed_manual.txt") as ftest:
            test_set = ftest.read().splitlines()
            train_set = ftrain.read().splitlines()
            # vectorizer = CountVectorizer()
            vectorizer = TfidfVectorizer(min_df=5, max_df=500, max_features=None,
                                         strip_accents='unicode', analyzer='word', token_pattern=r'\w{1,}',
                                         ngram_range=(1, 4), use_idf=1, smooth_idf=1, sublinear_tf=1,
                                         stop_words='english')
            # vectorizer = TfidfVectorizer()
            tfidf_matrix = vectorizer.fit_transform(train_set)
            print tfidf_matrix.shape

            smatrix = vectorizer.transform(test_set)
            print smatrix.shape

            svd = TruncatedSVD(n_components=500, random_state=42)
            svd.fit(tfidf_matrix)
            truncated_train_svd = svd.transform(tfidf_matrix)
            truncated_test_svd = svd.transform(smatrix)

            print truncated_train_svd.shape
            print truncated_test_svd.shape

            cosine = cosine_similarity(truncated_test_svd[0], truncated_train_svd)
            print cosine

        print "TEST SET: "
开发者ID:rudraksh125,项目名称:socialmedia,代码行数:29,代码来源:tfidf.py

示例5: cook

def cook():
    x, y, weights = load_data()
    n_components = 200
    svd = TruncatedSVD(n_components, random_state=42)
    x_unweighted = svd.fit_transform(x)
    x_weighted = svd.fit_transform(weighted(x, weights))

    for i in range(9):
        frac = 1 - (i * 0.01 + 0.01)
        print frac

        x_train, x_test, y_train, y_test = train_test_split(x_unweighted, y, test_size=frac)
        classifier = AdaBoostClassifier(n_estimators=100)
        classifier.fit(x_train, y_train)
        print "Unweighted: ", classifier.score(x_test, y_test)

        x_train, x_test, y_train, y_test = train_test_split(x_weighted, y, test_size=frac)
        classifier = AdaBoostClassifier(n_estimators=100)
        classifier.fit(x_train, y_train)
        print "Weighted: ", classifier.score(x_test, y_test)

        print '--------------------------'


    '''
开发者ID:wangchr,项目名称:eMeriL,代码行数:25,代码来源:cook.py

示例6: SVD_CV

def SVD_CV(counts, scores, n_comp=range(10,611,100)):

	n_avg = 16
	avg_err = []
	for n in range(0,n_avg):

		X_train, X_test, y_train, y_test = cross_validation.train_test_split(counts, scores, \
											test_size=0.2, random_state=n)
		test_err = []
		for n in n_comp:
			TruncTrans = TruncatedSVD(n_components=n)
			X_trunc_train = TruncTrans.fit_transform(X_train,scores)
			regr = linear_model(X_trunc_train,y_train)
			X_trunc_test = TruncTrans.transform(X_test)
			y_pred = regr.predict(X_trunc_test)*10**(-12)+3
			test_err.append(metrics.mean_squared_error(y_test, y_pred))

		if not avg_err:
			avg_err = test_err
		else:
			avg_err = [avg_err[i]+(test_err[i]*(1.0/n_avg)) for i in range(0,len(test_err))]



	plt.plot(n_comp, avg_err, label='Out-of-Sample Error')
	plt.xlabel('n components')
	plt.ylabel('MSE')
	plt.show()
开发者ID:kacunningham413,项目名称:MetaShoeReview,代码行数:28,代码来源:Metric_Models.py

示例7: kfold

def kfold(agetext,k,model,k2):
    import collections
    out = []
    for i in range(k):
        print "iteration: "+str(i)
        agetext = shuffle(agetext)
        datatb = agetext.iloc[:,1:]
        label = agetext["agegroup"].tolist()
        X_train, X_test, y_train, y_test = cross_validation.train_test_split(
            datatb, label, test_size=0.15, random_state=i*6)
        data = X_train.values
        counter = collections.Counter(y_train)
        print counter
        testdata = X_test.values
        lsa = TruncatedSVD(k2, algorithm = 'arpack')
        normalizer = Normalizer(copy=False)
        X = lsa.fit_transform(data)
        X = normalizer.fit_transform(X)
        X_test = lsa.transform(testdata)
        X_test = normalizer.transform(X_test)
        model.fit(X,y_train)
        pred = model.predict(X_test)
        counter = collections.Counter(y_test)
        print counter
        counter = collections.Counter(pred)
        print counter
        out.append(round(accuracy_score(y_test, pred),5))
    print str(out)
    print np.mean(out)
开发者ID:hurelyyu,项目名称:CS_Master_UW,代码行数:29,代码来源:AgeGroup.py

示例8: test_sparse_formats

def test_sparse_formats(fmt):
    Xfmt = Xdense if fmt == "dense" else getattr(X, "to" + fmt)()
    tsvd = TruncatedSVD(n_components=11)
    Xtrans = tsvd.fit_transform(Xfmt)
    assert_equal(Xtrans.shape, (n_samples, 11))
    Xtrans = tsvd.transform(Xfmt)
    assert_equal(Xtrans.shape, (n_samples, 11))
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:7,代码来源:test_truncated_svd.py

示例9: compute_svd

def compute_svd(Xs):
    # compute 1st principal component
    svd = TruncatedSVD(n_components=1, n_iter=20, random_state=0)
    svd.fit(Xs)
    pc = svd.components_
    print(pc.shape, svd.explained_variance_ratio_)
    return pc
开发者ID:andra-pumnea,项目名称:Thesis,代码行数:7,代码来源:weight_embeddings.py

示例10: lsa_summarizer

def lsa_summarizer(text,num_sen=5):
    sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
    sentenceTokens = sent_detector.tokenize(text.strip())

    tfvectorizer = TfidfVectorizer(tokenizer=tokenizeText)
    sparse = tfvectorizer.fit_transform(sentenceTokens).A
    lsa = TruncatedSVD(n_components=1)
    concept = lsa.fit_transform(sparse)

    pos = np.array(list(range(len(sentenceTokens))))    
    
    listlist = [list(x) for x in zip(sentenceTokens,concept,pos)]

    listlist.sort(key=lambda x: x[1],reverse=True)

    summarysentences = listlist[0:num_sen]

    summarysentences.sort(key=lambda x: x[2],reverse=False)

    summary = ""
    for n in range(num_sen):
        summary += ' ' + summarysentences[n][0]
        summary = " ".join(summary.replace(u"\xa0", u" ").strip().split())

    return summary
开发者ID:kenndanielso,项目名称:summarizer_app,代码行数:25,代码来源:summarizer.py

示例11: fit_document_matrix

    def fit_document_matrix(self, X):
        """
        Reduce dimension of sparse matrix X
        using Latent Semantic Analysis and
        build nearst neighbor model

        Parameters
        ----------
        X: sparse csr matrix, sparse term frequency matrix or
            others weighting matrix from documents
        """
        n_components = self.n_components
        n_iter = self.n_iter
        algorithm = self.algorithm
        lsa_model = TruncatedSVD(n_components=n_components,
                                 n_iter=n_iter,
                                 algorithm=algorithm)
        # reduce dimension using Latent Semantic Analysis
        vectors = lsa_model.fit_transform(X)
        self.vectors = vectors

        # build nearest neighbor model
        nbrs_model = build_nearest_neighbors(vectors, n_recommend=self.n_recommend)
        self.nbrs_model = nbrs_model

        return self
开发者ID:KarimJedda,项目名称:science_concierge,代码行数:26,代码来源:science_concierge.py

示例12: basic_lsi

def basic_lsi(df, n_components=200, max_df=0.5, min_df=5):
    '''
    Basic LSI model for album recommendations

    Args:
        df: dataframe with Pitchfork reviews
        n_components: number of lsi dimensions
        max_df: max_df in TfidfVectorizer
        min_df: min_df in TfidfVectorizer
    Returns:
        tfidf: sklearn fitted TfidfVectorizer
        tfidf_trans: sparse matrix with tfidf transformed data
        svd: sklearn fitted TruncatedSVD
        svd_trans: dense array with lsi transformed data

    '''

    X = df['review']
    stopwords = nltk.corpus.stopwords.words('english')

    tfidf = TfidfVectorizer(stop_words=stopwords,
                            max_df=max_df, min_df=min_df)
    tfidf_trans = tfidf.fit_transform(X)

    svd = TruncatedSVD(n_components=n_components)
    svd_trans = svd.fit_transform(tfidf_trans)

    return tfidf, tfidf_trans, svd, svd_trans
开发者ID:lwoloszy,项目名称:albumpitch,代码行数:28,代码来源:eda.py

示例13: buildKB16

def buildKB16(n_comp = 200, seed_value = 123):
    ## data
    # read the training/test data  
    print('Importing Data')
    xtrain = pd.read_csv('../input/xtrain_kb6099.csv')
    xtest = pd.read_csv('../input/xtest_kb6099.csv')
    
    # separate 
    id_train = xtrain.ID; xtrain.drop('ID', axis = 1, inplace = True)
    ytrain = xtrain.target; xtrain.drop('target', axis = 1, inplace = True)
    id_test = xtest.ID; xtest.drop('ID', axis = 1, inplace = True)
    
    # fit SVD
    svd = TruncatedSVD(n_components = n_comp,n_iter=5, random_state= seed_value)
    svd.fit(xtrain)
    xtrain = svd.transform(xtrain)
    xtest = svd.transform(xtest)
    xtrain = pd.DataFrame(xtrain)
    xtest = pd.DataFrame(xtest)
    
    ## store the results
    # add indices etc
    xtrain = pd.DataFrame(xtrain)
    xtrain['ID'] = id_train
    xtrain['target'] = ytrain
#
    xtest = pd.DataFrame(xtest)
    xtest['ID'] = id_test
#
#
#    # save the files
    xtrain.to_csv('../input/xtrain_kb16c'+str(n_comp)+'.csv', index = False, header = True)
    xtest.to_csv('../input/xtest_kb16c'+str(n_comp)+'.csv', index = False, header = True)
    
    return
开发者ID:mpearmain,项目名称:bnp,代码行数:35,代码来源:build_datasets.py

示例14: truncatedSVD

def truncatedSVD(data, labels, new_dimension):
    print "start truncatedSVD..."
    start = time.time()
    pca = TruncatedSVD(n_components=new_dimension)
    reduced = pca.fit_transform(data)
    end = time.time()
    return (reduced, end-start)
开发者ID:sebastian-alfers,项目名称:master-thesis,代码行数:7,代码来源:dimensionality_reduction.py

示例15: test_inverse_transform

def test_inverse_transform(algo):
    # We need a lot of components for the reconstruction to be "almost
    # equal" in all positions. XXX Test means or sums instead?
    tsvd = TruncatedSVD(n_components=52, random_state=42, algorithm=algo)
    Xt = tsvd.fit_transform(X)
    Xinv = tsvd.inverse_transform(Xt)
    assert_array_almost_equal(Xinv, Xdense, decimal=1)
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:7,代码来源:test_truncated_svd.py


注:本文中的sklearn.decomposition.TruncatedSVD类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。