当前位置: 首页>>代码示例>>Python>>正文


Python TruncatedSVD.transform方法代码示例

本文整理汇总了Python中sklearn.decomposition.TruncatedSVD.transform方法的典型用法代码示例。如果您正苦于以下问题:Python TruncatedSVD.transform方法的具体用法?Python TruncatedSVD.transform怎么用?Python TruncatedSVD.transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.decomposition.TruncatedSVD的用法示例。


在下文中一共展示了TruncatedSVD.transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: buildKB16

# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def buildKB16(n_comp = 200, seed_value = 123):
    ## data
    # read the training/test data  
    print('Importing Data')
    xtrain = pd.read_csv('../input/xtrain_kb6099.csv')
    xtest = pd.read_csv('../input/xtest_kb6099.csv')
    
    # separate 
    id_train = xtrain.ID; xtrain.drop('ID', axis = 1, inplace = True)
    ytrain = xtrain.target; xtrain.drop('target', axis = 1, inplace = True)
    id_test = xtest.ID; xtest.drop('ID', axis = 1, inplace = True)
    
    # fit SVD
    svd = TruncatedSVD(n_components = n_comp,n_iter=5, random_state= seed_value)
    svd.fit(xtrain)
    xtrain = svd.transform(xtrain)
    xtest = svd.transform(xtest)
    xtrain = pd.DataFrame(xtrain)
    xtest = pd.DataFrame(xtest)
    
    ## store the results
    # add indices etc
    xtrain = pd.DataFrame(xtrain)
    xtrain['ID'] = id_train
    xtrain['target'] = ytrain
#
    xtest = pd.DataFrame(xtest)
    xtest['ID'] = id_test
#
#
#    # save the files
    xtrain.to_csv('../input/xtrain_kb16c'+str(n_comp)+'.csv', index = False, header = True)
    xtest.to_csv('../input/xtest_kb16c'+str(n_comp)+'.csv', index = False, header = True)
    
    return
开发者ID:mpearmain,项目名称:bnp,代码行数:37,代码来源:build_datasets.py

示例2: main

# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def main():


	print 'Loading full...'

	small = load_matrix('training_matrix_full.txt', 100000) #generated from the cluster.py 

	tsvd = TruncatedSVD(5000)

	print 'Running knn...'
	train = load_matrix('training_matrix.txt')
	normalize(train, copy=False)
	print 'Loaded training data'
	test = load_matrix('testing_matrix.txt')
	normalize(test, copy=False)
	print 'Loaded testing data'


	train = tsvd.transform(train)
	test = tsvd.transform(test)

	train_tags = load_matrix('training_tags.txt')
	print 'Loaded training tags'
	test_tags = load_matrix('testing_tags.txt')
	print 'Loaded testing tags'


	print 'Testing SVM...'
	output = sgd(train, test, train_tags, test_tags)
	printStats(output, test_tags)
开发者ID:niangaotuantuan,项目名称:TagPrediction,代码行数:32,代码来源:KNN.py

示例3: TfIdf

# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
class TfIdf(Feature):
    def __init__(self):
        self.kbest = None
        self.vect = None
        self.truncated = None
        self.normalizer = None

    def train(self, reviews, labels):
        self.vect = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), stop_words='english')

        reviews_text = [' '.join(list(chain.from_iterable(review))) for review in reviews]
        tfidf_matrix = self.vect.fit_transform(reviews_text).toarray()

        self.truncated = TruncatedSVD(n_components=50)
        self.truncated.fit(tfidf_matrix, labels)

        trunc = self.truncated.transform(tfidf_matrix)
        self.normalizer = Normalizer()
        self.normalizer.fit(trunc)

        self.kbest = SelectKBest(f_classif, k=5)
        self.kbest.fit(self.normalizer.transform(trunc), labels)

    def score(self, data):
        reviews_text = ' '.join(list(chain.from_iterable(data)))
        tfidf_matrix = self.vect.transform([reviews_text]).toarray()

        trunc = self.truncated.transform(tfidf_matrix)

        return tuple(self.kbest.transform(self.normalizer.transform(trunc))[0, :])
开发者ID:EdwardBetts,项目名称:Yulp,代码行数:32,代码来源:tfidf.py

示例4: benchmark

# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def benchmark(k, epochs):
  print("*" * 80)
  print("k: %d, epochs: %d\n" % (k, epochs))

  #select = SelectKBest(score_func=chi2, k=k)
  select = TruncatedSVD(n_components=k)
  X_train_trunc = select.fit_transform(X_train, Y_train)
  X_test_trunc = select.transform(X_test)

  print('done truncating')

  clf = DBN([X_train_trunc.shape[1], k, 4], learn_rates=0.3, learn_rate_decays=0.9, epochs=epochs, verbose=1)
  clf.fit(X_train_trunc, Y_train)
  pred = clf.predict(X_test_trunc)

  if CREATE_SUBMISSION:
    X_submit_trunc = select.transform(X_submit)
    pred_submit = clf.predict(X_submit_trunc)
    dump_csv(pred_submit, k, epochs)

  score = metrics.f1_score(Y_test, pred)
  print("f1-score:   %0.3f" % score)

  print("classification report:")
  print(metrics.classification_report(Y_test, pred))

  print("confusion matrix:")
  print(metrics.confusion_matrix(Y_test, pred))
开发者ID:alireza-saberi,项目名称:Applied_MachineLearning_COMP_598_MiniProject2,代码行数:30,代码来源:dbn_test.py

示例5: train_pca_svm

# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def train_pca_svm(learning_data, pca_dims, probability=True, cache_size=3000, **svm_kwargs):
    (X_train, y_train, train_ids), (X_test, y_test, test_ids) = learning_data

    pca = TruncatedSVD(n_components=pca_dims)
    n_symbols = max(
        np.max(X_train) + 1, np.max(X_test) + 1
    )
    logger.info("Forming CSR Matrices")
    x_train, x_test = create_csr_matrix(X_train, n_symbols), create_csr_matrix(X_test, n_symbols)
    logger.info("Starting PCA")
    # pseudo-supervised PCA: fit on positive class only
    pca = pca.fit(x_train[y_train > 0])

    x_train_pca = pca.transform(x_train)
    x_test_pca = pca.transform(x_test)

    logger.info("Starting SVM")
    svc = SVC(probability=probability, cache_size=cache_size, **svm_kwargs)
    svc.fit(x_train_pca, y_train)
    logger.info("Scoring SVM")
    score = svc.score(x_test_pca, y_test)
    logger.info(score)
    svc.test_score = score
    pca.n_symbols = n_symbols
    return svc, pca, x_train_pca, x_test_pca
开发者ID:bhtucker,项目名称:chatnet,代码行数:27,代码来源:svm_model.py

示例6: benchmark

# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def benchmark(k, epochs):
  print("*" * 80)
  print("k: %d, epochs: %d\n" % (k, epochs))

  #select = SelectKBest(score_func=chi2, k=k)
  select = TruncatedSVD(n_components=k)
  X_train_trunc = select.fit_transform(X_train, Y_train)
  X_test_trunc = select.transform(X_test)

  print('done truncating')

  parameters = {'C': [1, 10, 100, 1000, 10000],  'class_weight': ['auto', None], 'tol':[0.001,0.0001]}
  clf = LinearSVC(C=100000)
  #clf = grid_search.GridSearchCV(svc, parameters)
  clf.fit(X_train_trunc, Y_train)
  pred = clf.predict(X_test_trunc)

  if CREATE_SUBMISSION:
    X_submit_trunc = select.transform(X_submit)
    pred_submit = clf.predict(X_submit_trunc)
    dump_csv(pred_submit, k, epochs)

  score = metrics.f1_score(Y_test, pred)
  print("f1-score:   %0.3f" % score)

  print("classification report:")
  print(metrics.classification_report(Y_test, pred))

  print("confusion matrix:")
  print(metrics.confusion_matrix(Y_test, pred))
开发者ID:alireza-saberi,项目名称:Applied_MachineLearning_COMP_598_MiniProject2,代码行数:32,代码来源:svm_test.py

示例7: train_manual

# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def train_manual():
    with open("../data/f_hashtag_prediction/train_data_tweets_processed_0_to_500K.txt") as ftrain:
        with open("../data/f_hashtag_prediction/test_data_tagged_processed_manual.txt") as ftest:
            test_set = ftest.read().splitlines()
            train_set = ftrain.read().splitlines()
            # vectorizer = CountVectorizer()
            vectorizer = TfidfVectorizer(min_df=5, max_df=500, max_features=None,
                                         strip_accents='unicode', analyzer='word', token_pattern=r'\w{1,}',
                                         ngram_range=(1, 4), use_idf=1, smooth_idf=1, sublinear_tf=1,
                                         stop_words='english')
            # vectorizer = TfidfVectorizer()
            tfidf_matrix = vectorizer.fit_transform(train_set)
            print tfidf_matrix.shape

            smatrix = vectorizer.transform(test_set)
            print smatrix.shape

            svd = TruncatedSVD(n_components=500, random_state=42)
            svd.fit(tfidf_matrix)
            truncated_train_svd = svd.transform(tfidf_matrix)
            truncated_test_svd = svd.transform(smatrix)

            print truncated_train_svd.shape
            print truncated_test_svd.shape

            cosine = cosine_similarity(truncated_test_svd[0], truncated_train_svd)
            print cosine

        print "TEST SET: "
开发者ID:rudraksh125,项目名称:socialmedia,代码行数:31,代码来源:tfidf.py

示例8: train

# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def train():
    with open("../data/f_hashtag_prediction/train_data_tweets_processed_0_to_500K.txt") as ftrain:
        with open("../data/f_hashtag_prediction/test_data_tweets_processed_2K.txt") as ftest:
            test_set = ftest.read().splitlines()
            train_set = ftrain.read().splitlines()
            # vectorizer = CountVectorizer()
            vectorizer = TfidfVectorizer(min_df=5, max_df=500, max_features=None,
                                         strip_accents='unicode', analyzer='word', token_pattern=r'\w{1,}',
                                         ngram_range=(1, 4), use_idf=1, smooth_idf=1, sublinear_tf=1,
                                         stop_words='english')
            # vectorizer = TfidfVectorizer()
            tfidf_matrix = vectorizer.fit_transform(train_set)
            print tfidf_matrix.shape
            # print tfidf_matrix
            # print vectorizer.fixed_vocabulary_
            smatrix = vectorizer.transform(test_set)
            print smatrix.shape

            joblib.dump(smatrix, "test_tfidf_matrix.o")
            joblib.dump(tfidf_matrix, "train_tfidf_matrix.o")

            svd = TruncatedSVD(n_components=500, random_state=42)
            svd.fit(tfidf_matrix)
            truncated_train_svd = svd.transform(tfidf_matrix)
            truncated_test_svd = svd.transform(smatrix)

            print truncated_train_svd.shape
            print truncated_test_svd.shape

            joblib.dump(truncated_train_svd, "truncated_train_svd.o")
            joblib.dump(truncated_test_svd, "truncated_test_svd.o")

        print "TEST SET: "
        test_index = 0
开发者ID:rudraksh125,项目名称:socialmedia,代码行数:36,代码来源:tfidf.py

示例9: test_singular_values

# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def test_singular_values():
    # Check that the TruncatedSVD output has the correct singular values

    rng = np.random.RandomState(0)
    n_samples = 100
    n_features = 80

    X = rng.randn(n_samples, n_features)

    apca = TruncatedSVD(n_components=2, algorithm='arpack',
                        random_state=rng).fit(X)
    rpca = TruncatedSVD(n_components=2, algorithm='arpack',
                        random_state=rng).fit(X)
    assert_array_almost_equal(apca.singular_values_, rpca.singular_values_, 12)

    # Compare to the Frobenius norm
    X_apca = apca.transform(X)
    X_rpca = rpca.transform(X)
    assert_array_almost_equal(np.sum(apca.singular_values_**2.0),
                              np.linalg.norm(X_apca, "fro")**2.0, 12)
    assert_array_almost_equal(np.sum(rpca.singular_values_**2.0),
                              np.linalg.norm(X_rpca, "fro")**2.0, 12)

    # Compare to the 2-norms of the score vectors
    assert_array_almost_equal(apca.singular_values_,
                              np.sqrt(np.sum(X_apca**2.0, axis=0)), 12)
    assert_array_almost_equal(rpca.singular_values_,
                              np.sqrt(np.sum(X_rpca**2.0, axis=0)), 12)

    # Set the singular values and see what we get back
    rng = np.random.RandomState(0)
    n_samples = 100
    n_features = 110

    X = rng.randn(n_samples, n_features)

    apca = TruncatedSVD(n_components=3, algorithm='arpack',
                        random_state=rng)
    rpca = TruncatedSVD(n_components=3, algorithm='randomized',
                        random_state=rng)
    X_apca = apca.fit_transform(X)
    X_rpca = rpca.fit_transform(X)

    X_apca /= np.sqrt(np.sum(X_apca**2.0, axis=0))
    X_rpca /= np.sqrt(np.sum(X_rpca**2.0, axis=0))
    X_apca[:, 0] *= 3.142
    X_apca[:, 1] *= 2.718
    X_rpca[:, 0] *= 3.142
    X_rpca[:, 1] *= 2.718

    X_hat_apca = np.dot(X_apca, apca.components_)
    X_hat_rpca = np.dot(X_rpca, rpca.components_)
    apca.fit(X_hat_apca)
    rpca.fit(X_hat_rpca)
    assert_array_almost_equal(apca.singular_values_, [3.142, 2.718, 1.0], 14)
    assert_array_almost_equal(rpca.singular_values_, [3.142, 2.718, 1.0], 14)
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:58,代码来源:test_truncated_svd.py

示例10: perform_emsamble_model

# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def perform_emsamble_model():
    #get data from csv file
    x , y_votes, y_comments, y_views, lat = read_train_data()
    #transform to nunpy data type array for better usage
    y_votes = np.array(y_votes)
    y_comments = np.array(y_comments)
    y_views = np.array(y_views)
    #get test data
    x_test, ids, lat = read_test_data()
    #Change the parameters from the objects with the values from gridsearch
    vec_votes = CountVectorizer(stop_words=None, strip_accents='unicode',analyzer='word',ngram_range=(1, 2), min_df=2)
    vec_comments = CountVectorizer(stop_words=None, strip_accents='unicode',analyzer='word',ngram_range=(1, 2), min_df=2)
    vec_views = CountVectorizer(stop_words=None, strip_accents='unicode',analyzer='word',ngram_range=(1, 2), min_df=2)
    #transfor x and x_test in a TFIDF matrix for feeding to the classifier
    x_votes = vec_votes.fit_transform(x)
    x_comments = vec_comments.fit_transform(x)
    x_views = vec_views.fit_transform(x)
    x_test_transformed_votes = vec_votes.transform(x_test)
    x_test_transformed_comments = vec_comments.transform(x_test)
    x_test_transformed_views = vec_views.transform(x_test)
    print "TFIDF Matrixes generated"
    print " LSA transforming"
    lsa_votes = TruncatedSVD(500)
    lsa_comments = TruncatedSVD(500)
    lsa_views = TruncatedSVD(500)
    x_votes = lsa_votes.fit_transform(x_votes)
    print "LSA Votes Done.."
    print
    x_comments = lsa_comments.fit_transform(x_comments)
    print "LSA Comments Done.."
    print
    x_views = lsa_views.fit_transform(x_views)
    print "LSA Views Done.."
    print
    x_test_transformed_votes = lsa_votes.transform(x_test_transformed_votes)
    x_test_transformed_comments = lsa_comments.transform(x_test_transformed_comments)
    x_test_transformed_views = lsa_views.transform(x_test_transformed_views)
    print "SLA Finished.."
    ada_votes = AdaBoostClassifier(base_estimator=RandomForestClassifier())
    ada_comments = AdaBoostClassifier(base_estimator=RandomForestClassifier())
    ada_views = AdaBoostClassifier(base_estimator=RandomForestClassifier())
    ada_votes.fit(x_votes, y_votes)
    ada_comments.fit(x_comments, y_comments)
    ada_views.fit(x_views, y_views)
    print "Fitting done"
    print
    #predict number of votes 
    pred_votes = ada_votes.predict(x_test_transformed_votes)
    pred_comments = ada_comments.predict(x_test_transformed_comments)
    pred_views = ada_views.predict(x_test_transformed_views)
    #generate submission response csv file
    create_csv_response(len(x_test), ids, pred_views, pred_votes, pred_comments)
开发者ID:gabrielfarah,项目名称:Kaggle,代码行数:54,代码来源:main.py

示例11: retrain

# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def retrain(svdcomp):
            smatrix = joblib.load("test_tfidf_matrix.o")
            tfidf_matrix = joblib.load("train_tfidf_matrix.o")

            svd = TruncatedSVD(n_components=svdcomp, random_state=42)
            svd.fit(tfidf_matrix)
            truncated_train_svd = svd.transform(tfidf_matrix)
            truncated_test_svd = svd.transform(smatrix)

            print truncated_train_svd.shape
            print truncated_test_svd.shape

            joblib.dump(truncated_train_svd, "truncated_train_svd_" + str(svdcomp)+".o")
            joblib.dump(truncated_test_svd, "truncated_test_svd_" + str(svdcomp)+".o")
开发者ID:rudraksh125,项目名称:socialmedia,代码行数:16,代码来源:tfidf.py

示例12: fit

# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
    def fit(self, user_feature_matrix, product_feature_matrix):
        """
        Fit latent factors to the user-feature matrix through truncated SVD,
        then get item representations by projecting onto the latent feature
        space.
        """

        nrm = lambda x: normalize(x.astype(np.float64), norm='l2', axis=1)

        svd = TruncatedSVD(n_components=self.dim)
        svd.fit(nrm(user_feature_matrix))

        self.user_factors = svd.transform(nrm(user_feature_matrix))
        self.item_factors = svd.transform(nrm(product_feature_matrix))
开发者ID:hbudyanto,项目名称:lightfm-paper,代码行数:16,代码来源:lsiup_model.py

示例13: pca

# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def pca(devMatrix, trainMatrix, devtarget, traintarget):

	print 'Running decomposition'
	svd = TruncatedSVD(n_components=1000)
	#trainMatrixTrans = svd.fit_transform(trainMatrix)
	#devMatrixTrans = svd.fit_transform(devMatrix)

	svd.fit(trainMatrix)
	trainMatrixTrans = svd.transform(trainMatrix)
	svd.fit(devMatrix)
	devMatrixTrans = svd.transform(devMatrix)
	print 'End Decomposition'
	#gradientBoost(devMatrixTrans, trainMatrixTrans, devtarget,traintarget)
	supportVectorMachine(devMatrixTrans,trainMatrixTrans,devtarget,traintarget)
开发者ID:katymccl3,项目名称:MachineLearning,代码行数:16,代码来源:dataParser.py

示例14: apply_lsi

# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def apply_lsi(train_data, test_data):
    """
    :param train_data: train dataset data
    :param test_data: testing dataset data
    :return: apply LSI on TFxIDF matrices and return transformed matrices
    """

    logger.info("Performing LSI on TFxIDF Matrices")

    if os.path.isfile("../Dataset/Train_LSI.pkl") and os.path.isfile("../Dataset/Test_LSI.pkl"):  # load pickle file if it exists
        logger.info("TFxIDF Matrices located at ../Dataset. Loading.")
        train_lsi = cPickle.load(open("../Dataset/Train_LSI.pkl", "r"))
        test_lsi = cPickle.load(open("../Dataset/Test_LSI.pkl", "r"))

    else:
        svd = TruncatedSVD(n_components=50)  # LSI applied with k=50
        train_lsi = svd.fit_transform(train_data)
        test_lsi = svd.transform(test_data)

        logger.info("TFxIDF Matrices Transformed")
        logger.info("Dumping TFxLSI Matrices to ../Dataset/")
        cPickle.dump(train_lsi,open("../Dataset/Train_LSI.pkl", "wb"))
        cPickle.dump(test_lsi,open("../Dataset/Test_LSI.pkl", "wb"))

    logger.info("Size of Transformed Training Dataset: {0}".format(train_lsi.shape))
    logger.info("Size of Transformed Testing Dataset: {0}".format(test_lsi.shape))

    return train_lsi, test_lsi
开发者ID:RonakSumbaly,项目名称:EE239AS-Signal-and-Systems,代码行数:30,代码来源:utility.py

示例15: t_svd_dummies

# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def t_svd_dummies(features):
    x_train = csr_matrix(hstack((
                features.title_tf_idf_train,
              )))

    x_test = csr_matrix(hstack((
                features.title_tf_idf_test,
              )))

    svd = TruncatedSVD(n_components=200, n_iter=5)
    x_train = svd.fit_transform(x_train)
    x_test = svd.transform(x_test)

    x_train = np.hstack((
        x_train,
        features.features_train,
        features.train_query_dummies
    ))
    x_test = np.hstack((
        x_test,
        features.features_test,
        features.test_query_dummies
    ))

    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.transform(x_test)

    y_train = features.train['median_relevance'].values

    return x_train, x_test, y_train
开发者ID:drsmithization,项目名称:kaggle_public,代码行数:33,代码来源:extractors.py


注:本文中的sklearn.decomposition.TruncatedSVD.transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。