本文整理汇总了Python中tsne.bh_sne函数的典型用法代码示例。如果您正苦于以下问题:Python bh_sne函数的具体用法?Python bh_sne怎么用?Python bh_sne使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了bh_sne函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: extract_tsne_gather_feat
def extract_tsne_gather_feat(stage):
"""
Extract tsne gather features.
Note: python2 only.
Better than func:extract_tsne_feat in cv, but worst in submission.
"""
df_w2vlem_join = pd.read_csv('tmp2/df_w2vlem_join.csv', index_col=0)
if stage <= 1:
df_feat = pd.DataFrame(index=df_w2vlem_join.index.values)
tfidf = TfidfVectorizer(ngram_range=(2,4), stop_words='english', min_df=2)
df_w2vlem_join['t_w2v'].to_csv('tmp2/t_w2v', index=False)
df_w2vlem_join['q_w2v'].to_csv('tmp2/q_w2v', index=False)
df_w2vlem_join['d_w2v'].to_csv('tmp2/d_w2v', index=False)
tfidf.set_params(input='filename')
tfidf.fit(['tmp2/t_w2v','tmp2/q_w2v','tmp2/d_w2v'])
tfidf.set_params(input='content')
cPickle.dump(tfidf, open('tmp2/tfidf_obj','wb'))
tfidf = cPickle.load(open('tmp2/tfidf_obj','rb'))
X_t = tfidf.transform(df_w2vlem_join['t_w2v'].tolist())
if stage <= 2:
svd = TruncatedSVD(n_components=100, random_state=2016)
X_svd = svd.fit_transform(X_t)
X_scaled = StandardScaler().fit_transform(X_svd)
X_tsne = bh_sne(X_scaled)
df_feat['tsne_t_1'] = X_tsne[:len(df_w2vlem_join), 0]
df_feat['tsne_t_2'] = X_tsne[:len(df_w2vlem_join), 1]
df_feat.to_csv('tmp2/tsne_t', index=False)
df_feat = pd.read_csv('tmp2/tsne_t')
if stage <= 3:
print(df_feat)
X_q = tfidf.transform(df_w2vlem_join['q_w2v'].tolist())
X_tq = sp.hstack([X_t, X_q]).tocsr()
svd = TruncatedSVD(n_components=50, random_state=2016)
X_svd = svd.fit_transform(X_tq)
X_scaled = StandardScaler().fit_transform(X_svd)
X_tsne = bh_sne(X_scaled)
df_feat['tsne_qt_1'] = X_tsne[:len(df_w2vlem_join), 0]
df_feat['tsne_qt_2'] = X_tsne[:len(df_w2vlem_join), 1]
df_feat.to_csv('tmp2/tsne_qt', index=False)
df_feat = pd.read_csv('tmp2/tsne_qt')
if stage <= 4:
print(df_feat)
X_d = tfidf.transform(df_w2vlem_join['d_w2v'].tolist())
svd = TruncatedSVD(n_components=100, random_state=2016)
X_svd = svd.fit_transform(X_d)
X_scaled = StandardScaler().fit_transform(X_svd)
X_tsne = bh_sne(X_scaled)
df_feat['tsne_desc_1'] = X_tsne[:len(df_w2vlem_join), 0]
df_feat['tsne_desc_2'] = X_tsne[:len(df_w2vlem_join), 1]
df_tsne_feats = df_feat
df_tsne_feats.to_csv('tmp2/df_tsne_gather_feats.csv')
示例2: test_seed
def test_seed():
from tsne import bh_sne
from sklearn.datasets import load_iris
import numpy as np
iris = load_iris()
X = iris.data
y = iris.target
t1 = bh_sne(X, random_state=np.random.RandomState(0), copy_data=True)
t2 = bh_sne(X, random_state=np.random.RandomState(0), copy_data=True)
assert np.all(t1 == t2)
示例3: fit_transform
def fit_transform(self, X):
"""Perform both a fit and a transform on the input data
Fit the data to the reduction algorithm, and transform the data to
the reduced space.
Parameters
----------
X : pandas.DataFrame
A (n_samples, n_features) dataframe to both fit and transform
Returns
-------
self : DataFrameReducerBase
A fit and transformed instance of the object
Raises
------
ValueError
If the input is not a pandas DataFrame, will not perform the fit
and transform
"""
from tsne import bh_sne
self._check_dataframe(X)
return pd.DataFrame(bh_sne(X), index=X.index)
示例4: t_sne
def t_sne(obj):
p = parser()
data_categories = {}
label_categories = {}
for d in obj:
for c in p.categories_item(d):
if c not in data_categories:
data_categories[c] = []
label_categories[c] = []
data_categories[c].append(d[1:])
label_categories[c].append('g' if d[0] == 1 else 'r')
print len(data_categories)
for c in data_categories:
print '------------------------'
print '%s (%d)' % (c, len(data_categories[c]))
print '------------------------'
if len(data_categories[c]) > 100:
t_sne(data_categories[c], label_categories[c])
else:
print 'small dimensionality'
arr = np.array(data_categories, dtype=np.float64)
x2 = bh_sne(arr)
plt.scatter(x2[:, 0], x2[:, 1], c=label_categories)
plt.show()
示例5: getTsne
def getTsne(modelFile, outDir, NBOW2=True):
pp = numpy.load(modelFile)
wv = pp['Wemb'].copy()
sklearn_pca = PCA(n_components=50)
Y_sklearn = sklearn_pca.fit_transform(wv)
Y_sklearn = numpy.asfarray( Y_sklearn, dtype='float' )
print "PCA transformation done ..."
print "Waitig for t-SNE computation ..."
reduced_vecs = bh_sne(Y_sklearn)
with open(outDir + "/tsne", "w") as out:
for i in range(len(reduced_vecs)):
out.write(str(reduced_vecs[i,0]) + " " + str(reduced_vecs[i,1]) + "\n")
out.close
print "t-SNE written to file ..."
if NBOW2:
av = pp['AVs'].astype('float64').T[0]
wts =[]
for i in range(len(wv)):
wt = sigmoid(numpy.dot(wv[i],av))
wts.append(wt)
with open(outDir + "/wts", "w") as out:
for i in range(len(wts)):
out.write(str(wts[i]) + "\n")
out.close
示例6: meta_pca_sne
def meta_pca_sne(exID, experiment_folder): # put exID back
plot_subfolder = experiment_folder + "/meta_pca"
plot_data_directory = check_create_directory(plot_subfolder)
filename = "{}/META".format(plot_data_directory)
# mongo stuff
dbClient = DatabaseClient()
filteredResults = dbClient.query(exID)
if filteredResults is None:
print "No results"
return
filteredId = filteredResults[0]['_id']
experiment = dbClient.get(filteredId)
list_of_coords = experiment['DATA']['TSNE_DATA']
np_list = np.asarray(list_of_coords)
print "META shape: ", np_list.shape
epochs = experiment['DATA']['EPOCH']
layers = experiment['DATA']['LAYER']
labels = []
no_samples = len(epochs)
for i in range(no_samples):
labels.append(epochs[i] + (layers[i]*0.1))
# labels.append(epochs[i])
labels = np.asarray(labels)
labels = labels[:500]
np_list = np_list[:,:500]
# print "LIST", np_list
# print "list size:", np_list.shape
perp = 10.0
no_data_shape = np_list.shape[0]
if (((perp / 3.0) - 1.0) < no_data_shape):
perp = (no_data_shape / 3.0) - 1.0
sne_co = bh_sne(np_list, perplexity=perp, theta=0.5)
print "sne", sne_co.shape
print "labels", labels
plt.scatter(sne_co[:,0], sne_co[:,1], c=labels)
plt.savefig(filename, dpi=120)
plt.close()
# plt.show()
print "show"
flat_coords = np.reshape(sne_co, (1,-1))
flat_coords = flat_coords.tolist()[0]
experiment['DATA']['META'] = flat_coords
updatedObject = dbClient.update(filteredId, experiment)
示例7: perform_tsne_transformation
def perform_tsne_transformation(X):
######### There is a bug in scikit-learn, hence cant do tsne with it. ##############
# tsne_model = TSNE(n_components=2,random_state=0)
# X_new = tsne_model.fit_transform(X)
X = np.asarray(X).astype('float64')
X = X.reshape((X.shape[0],-1))
X_new = bh_sne(X,perplexity=5)
return X_new
示例8: tsne
def tsne(embedding, word_2_id, sample_size = 1000):
embedding_2d = bh_sne(embedding.astype(np.float64))
keys = random.sample(word_2_id.keys(), sample_size)
fig, ax = plt.subplots()
for k in keys:
id = word_2_id[k]
ax.annotate(k, (embedding_2d[id, 0], embedding_2d[id, 1]))
plt.show()
示例9: visualize
def visualize(vecs):
print "Got the vectors, now doing dimesnion reduction..."
reduced = bh_sne(vecs)
print "Reduction done, now plotting: "
for i in range(len(reduced)):
plt.plot(vecs[i,0], vecs[i,1], marker='o', markersize=8)
plt.show()
开发者ID:bitliner,项目名称:Automatic-Extraction-of-Most-Relevant-Insights-From-Customer-Reviews,代码行数:9,代码来源:visualization.py
示例10: extract_tsne_feat
def extract_tsne_feat():
"""
Extract tsne features.
Note: python2 only.
"""
df_w2vlem_join = pd.read_csv('tmp2/df_w2vlem_join.csv', index_col=0)
df_feat = pd.DataFrame(index=df_w2vlem_join.index.values)
tfidf = TfidfVectorizer(ngram_range=(1,4), stop_words='english', min_df=2)
X_t = tfidf.fit_transform(df_w2vlem_join['t_w2v'].tolist())
svd = TruncatedSVD(n_components=100, random_state=2016)
X_svd = svd.fit_transform(X_t)
X_scaled = StandardScaler().fit_transform(X_svd)
X_tsne = bh_sne(X_scaled)
df_feat['tsne_t_1'] = X_tsne[:len(df_w2vlem_join), 0]
df_feat['tsne_t_2'] = X_tsne[:len(df_w2vlem_join), 1]
df_feat.to_csv('tmp2/tsne_t', index=False)
print(df_feat)
tfidf = TfidfVectorizer(ngram_range=(1,4), stop_words='english', min_df=2)
X_q = tfidf.fit_transform(df_w2vlem_join['q_w2v'].tolist())
X_tq = sp.hstack([X_t, X_q]).tocsr()
svd = TruncatedSVD(n_components=100, random_state=2016)
X_svd = svd.fit_transform(X_tq)
X_scaled = StandardScaler().fit_transform(X_svd)
X_tsne = bh_sne(X_scaled)
df_feat['tsne_qt_1'] = X_tsne[:len(df_w2vlem_join), 0]
df_feat['tsne_qt_2'] = X_tsne[:len(df_w2vlem_join), 1]
df_feat.to_csv('tmp2/tsne_qt', index=False)
df_feat = pd.read_csv('tmp2/tsne_qt')
print(df_feat)
tfidf = TfidfVectorizer(ngram_range=(1,3), stop_words='english', min_df=2)
X_d = tfidf.fit_transform(df_w2vlem_join['d_w2v'].tolist())
svd = TruncatedSVD(n_components=70, random_state=2016)
X_svd = svd.fit_transform(X_d)
X_scaled = StandardScaler().fit_transform(X_svd)
X_tsne = bh_sne(X_scaled)
df_feat['tsne_desc_1'] = X_tsne[:len(df_w2vlem_join), 0]
df_feat['tsne_desc_2'] = X_tsne[:len(df_w2vlem_join), 1]
df_tsne_feats = df_feat
df_tsne_feats.to_csv('tmp2/df_tsne_feats.csv')
示例11: test_iris
def test_iris():
from tsne import bh_sne
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data
y = iris.target
X_2d = bh_sne(X)
示例12: _tsne
def _tsne(X, dir_str="*.wav", perplexity=3, plotting=False):
"""
Utility function to compute tsne
"""
flist = sorted(glob.glob(dir_str))
Z = bh_sne(X, perplexity=perplexity)
if plotting:
figure()
plot(Z[:,0], Z[:,1],'r.')
[[text(p[0],p[1],'%s'%flist[i],fontsize=12) for i,p in enumerate(Z)]]
return Z
示例13: visualize_tsne
def visualize_tsne():
"""
play around with tsne to visualize image space
"""
import matplotlib.pyplot as plt
from tsne import bh_sne
tracker_df = pd.read_pickle('./tracker.pkl')
dfs = []
for category in listdir('/Volumes/micro/recommend-a-graham/imgs/'):
for user in listdir('/Volumes/micro/recommend-a-graham/imgs/'+category):
img_ids = listdir('/Volumes/micro/recommend-a-graham/imgs/{}/{}/'.format(category, user))
sub_df = tracker_df[tracker_df.img_id.apply(lambda x: x in img_ids)]
# user_df = pd.read_pickle('../fc8_pkls/fc8_{}.pkl'.format(user))
user_df = pd.read_pickle('../fc7_pkls/fc7_{}.pkl'.format(user))
user_df = user_df[user_df.shortcode.apply(lambda x: x in sub_df.shortcode.values)]
dfs.append(pd.merge(sub_df, user_df, on='shortcode'))
dfs = pd.concat(dfs, axis=0)
dfs.reset_index(inplace=True)
# dfs.fc8 = dfs.fc8.apply(lambda x: x.reshape(1, x.shape[0]))
dfs.fc7 = dfs.fc7.apply(lambda x: x.reshape(1, x.shape[0]))
# vectors = dfs.fc8.values
vectors = dfs.fc7.values
x_data = vectors[0]
for vector in vectors[1:]:
x_data = np.concatenate((x_data, vector), axis=0)
print x_data.shape
y_dict = {k:i for i,k in enumerate(dfs.username.unique())}
# y_dict = {k:i for i,k in enumerate(['cats', 'dogs', 'foodies',
# 'models','most_popular',
# 'photographers', 'travel'])}
y_data = dfs.username.apply(lambda x: y_dict[x]).values
vis_data = bh_sne(x_data)
vis_x = vis_data[:,0]
vis_y = vis_data[:,1]
plt.scatter(vis_x, vis_y, c=y_data, cmap=plt.cm.get_cmap("jet", 28))
cbar = plt.colorbar()
cbar.set_ticks([i*29./28 + 29./56 for i in range(28)])
# cbar.set_ticklabels(y_dict.keys())
cbar.set_ticklabels(zip(dfs.username.unique(), [user_cat_dict[i] for i in dfs.username.unique()]))
plt.clim(0, 29)
plt.title('tsne, fc7, 100img_per_user, 4user_per_categ')
plt.show()
示例14: run
def run(self):
config = Config.get()
# Create the embedding.
featureDict = Utils.read_features(config.getSample("ExternalFiles",
"vecs_with_id"),
id_set=getSampleIds())
keys = list(featureDict.keys())
vectors = np.array([featureDict[vID]["vector"] for vID in keys])
out = bh_sne(vectors,
pca_d=None,
theta=config.getfloat("PreprocessingConstants", "tsne_theta"))
X, Y = list(out[:, 0]), list(out[:, 1])
Utils.write_tsv(config.getSample("ExternalFiles", "article_embedding"),
("index", "x", "y"), keys, X, Y)
示例15: extract_w2v_tsne_feat
def extract_w2v_tsne_feat():
"""
Extract w2v tsne features.
Note: python2 only. Worst in cv, so do not use this.
"""
df_w2v_feats = pd.read_csv('tmp2/df_w2v_feats.csv', index_col=0)
X = df_w2v_feats.values
df_feat = pd.DataFrame(index=df_w2v_feats.index.values)
X_scaled = StandardScaler().fit_transform(X)
X_tsne = bh_sne(X_scaled)
df_feat['tsne_t_1'] = X_tsne[:len(df_w2v_feats), 0]
df_feat['tsne_t_2'] = X_tsne[:len(df_w2v_feats), 1]
df_feat.to_csv('tmp2/df_tsne_w2v_feats.csv')