本文整理汇总了Python中sklearn.decomposition.TruncatedSVD.transform方法的典型用法代码示例。如果您正苦于以下问题:Python TruncatedSVD.transform方法的具体用法?Python TruncatedSVD.transform怎么用?Python TruncatedSVD.transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.decomposition.TruncatedSVD
的用法示例。
在下文中一共展示了TruncatedSVD.transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: buildKB16
# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def buildKB16(n_comp = 200, seed_value = 123):
## data
# read the training/test data
print('Importing Data')
xtrain = pd.read_csv('../input/xtrain_kb6099.csv')
xtest = pd.read_csv('../input/xtest_kb6099.csv')
# separate
id_train = xtrain.ID; xtrain.drop('ID', axis = 1, inplace = True)
ytrain = xtrain.target; xtrain.drop('target', axis = 1, inplace = True)
id_test = xtest.ID; xtest.drop('ID', axis = 1, inplace = True)
# fit SVD
svd = TruncatedSVD(n_components = n_comp,n_iter=5, random_state= seed_value)
svd.fit(xtrain)
xtrain = svd.transform(xtrain)
xtest = svd.transform(xtest)
xtrain = pd.DataFrame(xtrain)
xtest = pd.DataFrame(xtest)
## store the results
# add indices etc
xtrain = pd.DataFrame(xtrain)
xtrain['ID'] = id_train
xtrain['target'] = ytrain
#
xtest = pd.DataFrame(xtest)
xtest['ID'] = id_test
#
#
# # save the files
xtrain.to_csv('../input/xtrain_kb16c'+str(n_comp)+'.csv', index = False, header = True)
xtest.to_csv('../input/xtest_kb16c'+str(n_comp)+'.csv', index = False, header = True)
return
示例2: main
# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def main():
print 'Loading full...'
small = load_matrix('training_matrix_full.txt', 100000) #generated from the cluster.py
tsvd = TruncatedSVD(5000)
print 'Running knn...'
train = load_matrix('training_matrix.txt')
normalize(train, copy=False)
print 'Loaded training data'
test = load_matrix('testing_matrix.txt')
normalize(test, copy=False)
print 'Loaded testing data'
train = tsvd.transform(train)
test = tsvd.transform(test)
train_tags = load_matrix('training_tags.txt')
print 'Loaded training tags'
test_tags = load_matrix('testing_tags.txt')
print 'Loaded testing tags'
print 'Testing SVM...'
output = sgd(train, test, train_tags, test_tags)
printStats(output, test_tags)
示例3: TfIdf
# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
class TfIdf(Feature):
def __init__(self):
self.kbest = None
self.vect = None
self.truncated = None
self.normalizer = None
def train(self, reviews, labels):
self.vect = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), stop_words='english')
reviews_text = [' '.join(list(chain.from_iterable(review))) for review in reviews]
tfidf_matrix = self.vect.fit_transform(reviews_text).toarray()
self.truncated = TruncatedSVD(n_components=50)
self.truncated.fit(tfidf_matrix, labels)
trunc = self.truncated.transform(tfidf_matrix)
self.normalizer = Normalizer()
self.normalizer.fit(trunc)
self.kbest = SelectKBest(f_classif, k=5)
self.kbest.fit(self.normalizer.transform(trunc), labels)
def score(self, data):
reviews_text = ' '.join(list(chain.from_iterable(data)))
tfidf_matrix = self.vect.transform([reviews_text]).toarray()
trunc = self.truncated.transform(tfidf_matrix)
return tuple(self.kbest.transform(self.normalizer.transform(trunc))[0, :])
示例4: benchmark
# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def benchmark(k, epochs):
print("*" * 80)
print("k: %d, epochs: %d\n" % (k, epochs))
#select = SelectKBest(score_func=chi2, k=k)
select = TruncatedSVD(n_components=k)
X_train_trunc = select.fit_transform(X_train, Y_train)
X_test_trunc = select.transform(X_test)
print('done truncating')
clf = DBN([X_train_trunc.shape[1], k, 4], learn_rates=0.3, learn_rate_decays=0.9, epochs=epochs, verbose=1)
clf.fit(X_train_trunc, Y_train)
pred = clf.predict(X_test_trunc)
if CREATE_SUBMISSION:
X_submit_trunc = select.transform(X_submit)
pred_submit = clf.predict(X_submit_trunc)
dump_csv(pred_submit, k, epochs)
score = metrics.f1_score(Y_test, pred)
print("f1-score: %0.3f" % score)
print("classification report:")
print(metrics.classification_report(Y_test, pred))
print("confusion matrix:")
print(metrics.confusion_matrix(Y_test, pred))
示例5: train_pca_svm
# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def train_pca_svm(learning_data, pca_dims, probability=True, cache_size=3000, **svm_kwargs):
(X_train, y_train, train_ids), (X_test, y_test, test_ids) = learning_data
pca = TruncatedSVD(n_components=pca_dims)
n_symbols = max(
np.max(X_train) + 1, np.max(X_test) + 1
)
logger.info("Forming CSR Matrices")
x_train, x_test = create_csr_matrix(X_train, n_symbols), create_csr_matrix(X_test, n_symbols)
logger.info("Starting PCA")
# pseudo-supervised PCA: fit on positive class only
pca = pca.fit(x_train[y_train > 0])
x_train_pca = pca.transform(x_train)
x_test_pca = pca.transform(x_test)
logger.info("Starting SVM")
svc = SVC(probability=probability, cache_size=cache_size, **svm_kwargs)
svc.fit(x_train_pca, y_train)
logger.info("Scoring SVM")
score = svc.score(x_test_pca, y_test)
logger.info(score)
svc.test_score = score
pca.n_symbols = n_symbols
return svc, pca, x_train_pca, x_test_pca
示例6: benchmark
# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def benchmark(k, epochs):
print("*" * 80)
print("k: %d, epochs: %d\n" % (k, epochs))
#select = SelectKBest(score_func=chi2, k=k)
select = TruncatedSVD(n_components=k)
X_train_trunc = select.fit_transform(X_train, Y_train)
X_test_trunc = select.transform(X_test)
print('done truncating')
parameters = {'C': [1, 10, 100, 1000, 10000], 'class_weight': ['auto', None], 'tol':[0.001,0.0001]}
clf = LinearSVC(C=100000)
#clf = grid_search.GridSearchCV(svc, parameters)
clf.fit(X_train_trunc, Y_train)
pred = clf.predict(X_test_trunc)
if CREATE_SUBMISSION:
X_submit_trunc = select.transform(X_submit)
pred_submit = clf.predict(X_submit_trunc)
dump_csv(pred_submit, k, epochs)
score = metrics.f1_score(Y_test, pred)
print("f1-score: %0.3f" % score)
print("classification report:")
print(metrics.classification_report(Y_test, pred))
print("confusion matrix:")
print(metrics.confusion_matrix(Y_test, pred))
示例7: train_manual
# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def train_manual():
with open("../data/f_hashtag_prediction/train_data_tweets_processed_0_to_500K.txt") as ftrain:
with open("../data/f_hashtag_prediction/test_data_tagged_processed_manual.txt") as ftest:
test_set = ftest.read().splitlines()
train_set = ftrain.read().splitlines()
# vectorizer = CountVectorizer()
vectorizer = TfidfVectorizer(min_df=5, max_df=500, max_features=None,
strip_accents='unicode', analyzer='word', token_pattern=r'\w{1,}',
ngram_range=(1, 4), use_idf=1, smooth_idf=1, sublinear_tf=1,
stop_words='english')
# vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(train_set)
print tfidf_matrix.shape
smatrix = vectorizer.transform(test_set)
print smatrix.shape
svd = TruncatedSVD(n_components=500, random_state=42)
svd.fit(tfidf_matrix)
truncated_train_svd = svd.transform(tfidf_matrix)
truncated_test_svd = svd.transform(smatrix)
print truncated_train_svd.shape
print truncated_test_svd.shape
cosine = cosine_similarity(truncated_test_svd[0], truncated_train_svd)
print cosine
print "TEST SET: "
示例8: train
# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def train():
with open("../data/f_hashtag_prediction/train_data_tweets_processed_0_to_500K.txt") as ftrain:
with open("../data/f_hashtag_prediction/test_data_tweets_processed_2K.txt") as ftest:
test_set = ftest.read().splitlines()
train_set = ftrain.read().splitlines()
# vectorizer = CountVectorizer()
vectorizer = TfidfVectorizer(min_df=5, max_df=500, max_features=None,
strip_accents='unicode', analyzer='word', token_pattern=r'\w{1,}',
ngram_range=(1, 4), use_idf=1, smooth_idf=1, sublinear_tf=1,
stop_words='english')
# vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(train_set)
print tfidf_matrix.shape
# print tfidf_matrix
# print vectorizer.fixed_vocabulary_
smatrix = vectorizer.transform(test_set)
print smatrix.shape
joblib.dump(smatrix, "test_tfidf_matrix.o")
joblib.dump(tfidf_matrix, "train_tfidf_matrix.o")
svd = TruncatedSVD(n_components=500, random_state=42)
svd.fit(tfidf_matrix)
truncated_train_svd = svd.transform(tfidf_matrix)
truncated_test_svd = svd.transform(smatrix)
print truncated_train_svd.shape
print truncated_test_svd.shape
joblib.dump(truncated_train_svd, "truncated_train_svd.o")
joblib.dump(truncated_test_svd, "truncated_test_svd.o")
print "TEST SET: "
test_index = 0
示例9: test_singular_values
# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def test_singular_values():
# Check that the TruncatedSVD output has the correct singular values
rng = np.random.RandomState(0)
n_samples = 100
n_features = 80
X = rng.randn(n_samples, n_features)
apca = TruncatedSVD(n_components=2, algorithm='arpack',
random_state=rng).fit(X)
rpca = TruncatedSVD(n_components=2, algorithm='arpack',
random_state=rng).fit(X)
assert_array_almost_equal(apca.singular_values_, rpca.singular_values_, 12)
# Compare to the Frobenius norm
X_apca = apca.transform(X)
X_rpca = rpca.transform(X)
assert_array_almost_equal(np.sum(apca.singular_values_**2.0),
np.linalg.norm(X_apca, "fro")**2.0, 12)
assert_array_almost_equal(np.sum(rpca.singular_values_**2.0),
np.linalg.norm(X_rpca, "fro")**2.0, 12)
# Compare to the 2-norms of the score vectors
assert_array_almost_equal(apca.singular_values_,
np.sqrt(np.sum(X_apca**2.0, axis=0)), 12)
assert_array_almost_equal(rpca.singular_values_,
np.sqrt(np.sum(X_rpca**2.0, axis=0)), 12)
# Set the singular values and see what we get back
rng = np.random.RandomState(0)
n_samples = 100
n_features = 110
X = rng.randn(n_samples, n_features)
apca = TruncatedSVD(n_components=3, algorithm='arpack',
random_state=rng)
rpca = TruncatedSVD(n_components=3, algorithm='randomized',
random_state=rng)
X_apca = apca.fit_transform(X)
X_rpca = rpca.fit_transform(X)
X_apca /= np.sqrt(np.sum(X_apca**2.0, axis=0))
X_rpca /= np.sqrt(np.sum(X_rpca**2.0, axis=0))
X_apca[:, 0] *= 3.142
X_apca[:, 1] *= 2.718
X_rpca[:, 0] *= 3.142
X_rpca[:, 1] *= 2.718
X_hat_apca = np.dot(X_apca, apca.components_)
X_hat_rpca = np.dot(X_rpca, rpca.components_)
apca.fit(X_hat_apca)
rpca.fit(X_hat_rpca)
assert_array_almost_equal(apca.singular_values_, [3.142, 2.718, 1.0], 14)
assert_array_almost_equal(rpca.singular_values_, [3.142, 2.718, 1.0], 14)
示例10: perform_emsamble_model
# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def perform_emsamble_model():
#get data from csv file
x , y_votes, y_comments, y_views, lat = read_train_data()
#transform to nunpy data type array for better usage
y_votes = np.array(y_votes)
y_comments = np.array(y_comments)
y_views = np.array(y_views)
#get test data
x_test, ids, lat = read_test_data()
#Change the parameters from the objects with the values from gridsearch
vec_votes = CountVectorizer(stop_words=None, strip_accents='unicode',analyzer='word',ngram_range=(1, 2), min_df=2)
vec_comments = CountVectorizer(stop_words=None, strip_accents='unicode',analyzer='word',ngram_range=(1, 2), min_df=2)
vec_views = CountVectorizer(stop_words=None, strip_accents='unicode',analyzer='word',ngram_range=(1, 2), min_df=2)
#transfor x and x_test in a TFIDF matrix for feeding to the classifier
x_votes = vec_votes.fit_transform(x)
x_comments = vec_comments.fit_transform(x)
x_views = vec_views.fit_transform(x)
x_test_transformed_votes = vec_votes.transform(x_test)
x_test_transformed_comments = vec_comments.transform(x_test)
x_test_transformed_views = vec_views.transform(x_test)
print "TFIDF Matrixes generated"
print " LSA transforming"
lsa_votes = TruncatedSVD(500)
lsa_comments = TruncatedSVD(500)
lsa_views = TruncatedSVD(500)
x_votes = lsa_votes.fit_transform(x_votes)
print "LSA Votes Done.."
print
x_comments = lsa_comments.fit_transform(x_comments)
print "LSA Comments Done.."
print
x_views = lsa_views.fit_transform(x_views)
print "LSA Views Done.."
print
x_test_transformed_votes = lsa_votes.transform(x_test_transformed_votes)
x_test_transformed_comments = lsa_comments.transform(x_test_transformed_comments)
x_test_transformed_views = lsa_views.transform(x_test_transformed_views)
print "SLA Finished.."
ada_votes = AdaBoostClassifier(base_estimator=RandomForestClassifier())
ada_comments = AdaBoostClassifier(base_estimator=RandomForestClassifier())
ada_views = AdaBoostClassifier(base_estimator=RandomForestClassifier())
ada_votes.fit(x_votes, y_votes)
ada_comments.fit(x_comments, y_comments)
ada_views.fit(x_views, y_views)
print "Fitting done"
print
#predict number of votes
pred_votes = ada_votes.predict(x_test_transformed_votes)
pred_comments = ada_comments.predict(x_test_transformed_comments)
pred_views = ada_views.predict(x_test_transformed_views)
#generate submission response csv file
create_csv_response(len(x_test), ids, pred_views, pred_votes, pred_comments)
示例11: retrain
# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def retrain(svdcomp):
smatrix = joblib.load("test_tfidf_matrix.o")
tfidf_matrix = joblib.load("train_tfidf_matrix.o")
svd = TruncatedSVD(n_components=svdcomp, random_state=42)
svd.fit(tfidf_matrix)
truncated_train_svd = svd.transform(tfidf_matrix)
truncated_test_svd = svd.transform(smatrix)
print truncated_train_svd.shape
print truncated_test_svd.shape
joblib.dump(truncated_train_svd, "truncated_train_svd_" + str(svdcomp)+".o")
joblib.dump(truncated_test_svd, "truncated_test_svd_" + str(svdcomp)+".o")
示例12: fit
# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def fit(self, user_feature_matrix, product_feature_matrix):
"""
Fit latent factors to the user-feature matrix through truncated SVD,
then get item representations by projecting onto the latent feature
space.
"""
nrm = lambda x: normalize(x.astype(np.float64), norm='l2', axis=1)
svd = TruncatedSVD(n_components=self.dim)
svd.fit(nrm(user_feature_matrix))
self.user_factors = svd.transform(nrm(user_feature_matrix))
self.item_factors = svd.transform(nrm(product_feature_matrix))
示例13: pca
# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def pca(devMatrix, trainMatrix, devtarget, traintarget):
print 'Running decomposition'
svd = TruncatedSVD(n_components=1000)
#trainMatrixTrans = svd.fit_transform(trainMatrix)
#devMatrixTrans = svd.fit_transform(devMatrix)
svd.fit(trainMatrix)
trainMatrixTrans = svd.transform(trainMatrix)
svd.fit(devMatrix)
devMatrixTrans = svd.transform(devMatrix)
print 'End Decomposition'
#gradientBoost(devMatrixTrans, trainMatrixTrans, devtarget,traintarget)
supportVectorMachine(devMatrixTrans,trainMatrixTrans,devtarget,traintarget)
示例14: apply_lsi
# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def apply_lsi(train_data, test_data):
"""
:param train_data: train dataset data
:param test_data: testing dataset data
:return: apply LSI on TFxIDF matrices and return transformed matrices
"""
logger.info("Performing LSI on TFxIDF Matrices")
if os.path.isfile("../Dataset/Train_LSI.pkl") and os.path.isfile("../Dataset/Test_LSI.pkl"): # load pickle file if it exists
logger.info("TFxIDF Matrices located at ../Dataset. Loading.")
train_lsi = cPickle.load(open("../Dataset/Train_LSI.pkl", "r"))
test_lsi = cPickle.load(open("../Dataset/Test_LSI.pkl", "r"))
else:
svd = TruncatedSVD(n_components=50) # LSI applied with k=50
train_lsi = svd.fit_transform(train_data)
test_lsi = svd.transform(test_data)
logger.info("TFxIDF Matrices Transformed")
logger.info("Dumping TFxLSI Matrices to ../Dataset/")
cPickle.dump(train_lsi,open("../Dataset/Train_LSI.pkl", "wb"))
cPickle.dump(test_lsi,open("../Dataset/Test_LSI.pkl", "wb"))
logger.info("Size of Transformed Training Dataset: {0}".format(train_lsi.shape))
logger.info("Size of Transformed Testing Dataset: {0}".format(test_lsi.shape))
return train_lsi, test_lsi
示例15: t_svd_dummies
# 需要导入模块: from sklearn.decomposition import TruncatedSVD [as 别名]
# 或者: from sklearn.decomposition.TruncatedSVD import transform [as 别名]
def t_svd_dummies(features):
x_train = csr_matrix(hstack((
features.title_tf_idf_train,
)))
x_test = csr_matrix(hstack((
features.title_tf_idf_test,
)))
svd = TruncatedSVD(n_components=200, n_iter=5)
x_train = svd.fit_transform(x_train)
x_test = svd.transform(x_test)
x_train = np.hstack((
x_train,
features.features_train,
features.train_query_dummies
))
x_test = np.hstack((
x_test,
features.features_test,
features.test_query_dummies
))
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
y_train = features.train['median_relevance'].values
return x_train, x_test, y_train