本文整理汇总了Python中sklearn.decomposition.PCA.tolist方法的典型用法代码示例。如果您正苦于以下问题:Python PCA.tolist方法的具体用法?Python PCA.tolist怎么用?Python PCA.tolist使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.decomposition.PCA
的用法示例。
在下文中一共展示了PCA.tolist方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: dim_survey
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import tolist [as 别名]
def dim_survey(X, entry_id):
# convert to numpy
X = np.array(X)
# run the reduction.
X_pca = PCA(n_components=3).fit_transform(X)
X_tsne = TSNE(n_components=3).fit_transform(X)
X_ica = FastICA(n_components=3).fit_transform(X)
# connect to db.
with mongoctx() as db:
# update the stuff.
db['entry'].update(
{
'_id': ObjectId(entry_id)
},
{
'$set': {
'pca': X_pca.tolist(),
'tsne': X_tsne.tolist(),
'ica': X_ica.tolist(),
}
}
)
示例2: preprocessing
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import tolist [as 别名]
def preprocessing(self):
tmpFile = self.saveTempFile( self._traces, self._labels )
X, Y = self.getArray(tmpFile.name)
os.unlink(tmpFile.name)
X_scaled = preprocessing.scale(X)
X_pca = PCA().fit_transform(X_scaled)
self._tracesSet = X_pca.tolist()
self._tracesLabel = Y
示例3: testKMeansLearner
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import tolist [as 别名]
def testKMeansLearner(ds,learner):
''' Takes a kmeans learner and test data and
runs cross validation '''
#make copy of original ds
pca_ds = copy.deepcopy(ds)
if learner == createPCAKMeansLearner:
examples = [row[:-1] for row in pca_ds.examples]
norm_examples = scale(examples).tolist()
reduced_data = PCA(n_components=2).fit_transform(norm_examples)
reduced_data_with_output = [reduced_data.tolist()[i]+[row[-1]] for i,row in enumerate(pca_ds.examples)]
pca_ds.examples = reduced_data_with_output
pca_ds.target = 2
pca_ds.inputs = range(0,2)
return cross_validation(learner, pca_ds)
示例4: search
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import tolist [as 别名]
def search():
b_start = time.time()
searches = request.args.getlist('filename', None)
level = int(request.args.get('level', 19))
region = request.args.get('region', 'Allegheny')
limit = int(request.args.get('limit', 25))
if limit > 100:
limit = 100
for i, s in enumerate(searches):
if not s.endswith('.png'):
searches[i] = searches[i] + '.png'
#tsne params
perplexity = float(request.args.get('perplexity', 30.0))
early_exaggeration = float(request.args.get('early_x', 4.0))
learning_rate = float(request.args.get('learning_rate', 1000.0))
metric = request.args.get('metric', 'euclidean')
to_pca = bool(request.args.get('pca', True))
pca_only = bool(request.args.get('pca_only', True))
matches = []
res = {}
print searches
region_level = (region, level)
filenames = all_filenames[region_level]
tile_dict = tile_dicts[region_level]
features = all_features[region_level]
neighbours = knn_trees[region_level]
features_filename = features_filenames[region_level]
if searches is not None:
t_start = time.time()
# makes searches into a singleton list if it isn't already a list
searches = maybe_wrap(searches)
search_features = []
for search in searches:
try:
filename_index = tile_dict[search]
search_feature = features[filename_index]
search_features.append(search_feature)
except KeyError:
pass
# centroid of search_features given
if len(search_features) > 0:
search_features = [np.mean(search_features, axis=0)]
m = time.time()
distances, indices = neighbours.kneighbors(search_features, limit)
print 'Ball tree request time: ' + str(time.time() - m)
similar_features = [features[i] for i in indices[0]]
if pca_only:
reduced_dim = PCA(n_components=2).fit_transform(similar_features)
else:
reduced_dim = FastICA(n_components=2).fit_transform(similar_features)
reduced_dim = reduced_dim/np.max(np.abs(reduced_dim))
# key is 'tsne_pos' as server code still expects 'tsne_pos' as the key
# even though dim. reduction is done with PCA now
matches = [{'distance':dist, 'filename':filenames[i], 'tsne_pos':tuple(tsne_pos)} \
for dist, i, tsne_pos in zip(distances[0].tolist(), indices[0].tolist(), reduced_dim.tolist())]
t_duration = time.time() - t_start
print 'Total duration: ' + str(t_duration)
res = {'duration' : t_duration,
'features_file': features_filename,
'matches': matches,
}
else:
res = {'error':'tile not found'}
else:
res = {'error':'filename param missing'}
return json.dumps(res)
示例5: getKMeansVoronoi
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import tolist [as 别名]
def getKMeansVoronoi(matrix, k, max_iter, initMethod, n_init, tolerance, metric_dist, filenames):
"""
Generate an array of centroid index based on the active files, list of points for the centroids, and a list
of points for the chunks.
Args:
NumberOnlymatrix: a numpy matrix without file names and word
matrix: a python matrix representing the counts of words in files
k: int, k-value
max_iter: int, maximum number of iterations
initMethod: str, method of initialization: 'k++' or 'random'
n_init: int, number of iterations with different centroids
tolerance: float, relative tolerance, inertia to declare convergence
metric_dist: str, method of the distance metrics
filenames: list of active files
Returns:
bestIndex: an array of the cluster index for each sample
siltteScore: float, silhouette score
colorChart: string of rgb tuples
finalPointsList: list of xy coords for each chunk
finalCentroidsList: list of xy coords for each centroid
textData: dicitonary of labels, xcoord, and ycoord
maxVal: the maximum x or y value used to set bounds in javascript
"""
NumberOnlymatrix = matrix.tolist()
# xy coordinates for each chunk
reduced_data = PCA(n_components=2).fit_transform(matrix)
# n_init statically set to 300 for now. Probably should be determined based on number of active files
kmeans = KMeans(init=initMethod, n_clusters=k, n_init=n_init, tol=tolerance, max_iter=max_iter)
kmeansIndex = kmeans.fit_predict(reduced_data)
bestIndex = kmeansIndex.tolist()
fullCoordList = reduced_data.tolist()
# make an array centroidGroups whose elements are the coords that belong to each centroid
i = 1
seen = [bestIndex[0]]
centroidGroups = [[] for _ in range(k)] # make a list of k lists, one for each cluster
centroidGroups[bestIndex[0]].append((fullCoordList[0])) # Group the centroids based on their cluster number
while i < len(bestIndex):
if bestIndex[i] in seen:
centroidGroups[bestIndex[i]].append(fullCoordList[i])
i += 1
else:
seen.append(bestIndex[i])
centroidGroups[bestIndex[i]].append(fullCoordList[i])
i += 1
# Separate the x an y coordinates to calculate the centroid
xsList = []
ysList = []
for i in xrange(0, len(centroidGroups)):
tempXcoordList = []
tempYcoordList = []
for j in xrange(0, len(centroidGroups[i])):
tempXcoord = centroidGroups[i][j][0]
tempXcoordList.append(tempXcoord)
tempYcoord = centroidGroups[i][j][1]
tempYcoordList.append(tempYcoord)
xsList.append(tempXcoordList)
ysList.append(tempYcoordList)
# calculate the coordinates for the centroid
centroidCoords = []
for i in xrange(0, len(xsList)):
if len(xsList[i]) == 1:
temp1 = xsList[i][0] # each element in xslist is a list, but we need an int
temp2 = ysList[i][0] # each element in yslist is a list, but we need an int
centroidCoords.append([temp1, temp2])
else:
centroidCoord = centroid(xsList[i], ysList[i])
centroidCoords.append(centroidCoord)
xs, ys = reduced_data[:, 0], reduced_data[:, 1]
origXs = xs.tolist()
origYs = ys.tolist()
# Looks the same as above but necessary because neither can be manipulated more than once
xs = xs.tolist()
ys = ys.tolist()
# Translate every coordinate to positive as svg starts at top left with coordinate (0,0)
transX = abs(min(xs)) + 100
transY = abs(min(ys)) + 100
transXs, transYs = translateCoordsToPositive(origXs, origYs, transX, transY)
# Find the max coordinate to help determine the width (D3)
maxX = max(transXs)
maxY = max(transYs)
maxList = [maxX, maxY]
maxVal = max(maxList)
#.........这里部分代码省略.........
示例6: getKMeans
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import tolist [as 别名]
def getKMeans(NumberOnlymatrix, matrix, k, max_iter, initMethod, n_init, tolerance, DocTermSparseMatrix, metric_dist):
"""
Generate an array of centroid index based on the active files.
Args:
NumberOnlymatrix: a matrix without file names and word
matrix: a matrix representing the counts of words in files
k: int, k-value
max_iter: int, maximum number of iterations
initMethod: str, method of initialization: 'k++' or 'random'
n_init: int, number of iterations with different centroids
tolerance: float, relative tolerance, inertia to declare convergence
DocTermSparseMatrix: sparse matrix of the word counts
metric_dist: str, method of the distance metrics
Returns:
kmeansIndex: a numpy array of the cluster index for each sample
siltteScore: float, silhouette score
"""
"""Parameters for KMeans (SKlearn)"""
# n_clusters: int, optional, default: 8
# namely, K; number of clusters to form OR number of centroids to generate
# max_iter : int
# Maximum number of iterations of the k-means algorithm for a single run
# n_init : int, optional, default: 10
# Number of time the k-means algorithm will be run with different centroid seeds
# init : 'k-means++', 'random' or an ndarray
# method for initialization;
# 'k-means++': selects initial cluster centers for k-mean clustering in a smart way to speed up convergence
# precompute_distances : boolean
# tol : float, optional default: 1e-4
# Relative tolerance w.r.t. inertia to declare convergence
# n_jobs : int
# The number of jobs to use for the computation
# -1 : all CPUs are used
# 1 : no parallel computing code is used at all; useful for debugging
# For n_jobs below -1, (n_cpus + 1 + n_jobs) are used.
# -2 : all CPUs but one are used.
inequality = '≤'.decode('utf-8')
# trap bad silhouette score input
#Convert from sparse matrix
data= DocTermSparseMatrix.toarray()
#coordinates for each cluster
reduced_data = PCA(n_components=2).fit_transform(data)
coordList=reduced_data.tolist()
#Run fit_predict 100 times and find the most common combo to account for variation
combosDict= {}
for i in xrange(0,300):
kmeans = KMeans(init= initMethod, n_clusters=k, n_init=n_init)
kmeansIndex = kmeans.fit_predict(reduced_data)
item= kmeansIndex.tolist()
combo= ' '.join(str(x) for x in item)
if combo in combosDict:
combosDict[combo]+=1
else:
combosDict[combo]=1
values=list(combosDict.values())
keys=list(combosDict.keys())
bestKey=keys[values.index(max(values))]
stringIndex= bestKey.split()
bestIndex=[]
for x in stringIndex:
bestIndex.append(int(x))
if k<= 2:
siltteScore = "N/A [Not avaiable for K " + inequality + " 2]"
elif (k > (matrix.shape[0]-1)):
siltteScore = 'N/A [Not avaiable if (K value) > (number of active files -1)]'
else:
kmeans.fit(NumberOnlymatrix)
labels = kmeans.labels_ # for silhouette score
siltteScore = getSiloutteOnKMeans(labels, matrix, metric_dist)
return bestIndex, siltteScore # integer ndarray with shape (n_samples,) -- label[i] is the code or index of the centroid the i'th observation is closest to
示例7: plotPCAKMeansLearner
# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import tolist [as 别名]
def plotPCAKMeansLearner(ds,num_clusters=10):
''' Takes a kmeans learner and ds, outputs
a virinoi diagram with each input point plotted '''
#dimension reduction steps
examples = [row[:-1] for row in ds.examples]
norm_examples = scale(examples).tolist()
reduced_data = PCA(n_components=2).fit_transform(norm_examples)
reduced_data_with_output = [reduced_data.tolist()[i]+[row[-1]] for i,row in enumerate(ds.examples)]
#instantiate and fit kmeans object
pca_kmeans = KMeans(init='k-means++', n_clusters=num_clusters, n_init=10)
pca_kmeans.fit(reduced_data)
#get the boundaries of our grid
x_min, x_max = reduced_data[:, 0].min() + 1, reduced_data[:, 0].max() - 1
y_min, y_max = reduced_data[:, 1].min() + 1, reduced_data[:, 1].max() - 1
#create our background grid
plot = pl.subplot(111)
plot.patch.set_facecolor('black')
pl.xlim([x_min,x_max])
pl.ylim([y_min,y_max])
#create a dictionary of the cluster to img_list
img_dict = defaultdict(list)
for i,row in enumerate(ds.examples):
#get the cluster
cluster = pca_kmeans.labels_[i]
#get the img and img coordinates
coordinates = reduced_data[i]
img = np.array(ds.examples[i][:-1]).reshape((16,16))
#for each cluster append the img to its value
img_dict[cluster].append((img,coordinates))
color_map = {0: [1.0,0.0,0.0],
1: [0.93,0.57,0.13],
2: [1.0,0.8431,0.0],
3: [.1961,.8039,.1961],
4: [0,1.0,0],
5: [0.0,0.8078,.8196],
6: [.098,.098,.4392],
7: [.6275,.1255,.9412],
8: [1.0,.4118,.7059],
9: [1.0,1.0,1.0] }
#plot a subset of images for each cluster
for cluster,v in img_dict.items():
#sample 10 images
img_list = random.sample(v,10)
#plot each subset
for i,img_and_coord in enumerate(v):
if i < 10:
color_img = img_list[i][0].tolist()
#change colors
for j,row in enumerate(color_img):
for k,val in enumerate(row):
if val == 1.0:
color_img[j][k] = color_map[cluster]
else:
color_img[j][k] = [0.0,0.0,0.0]
osb_img = osb.OffsetImage(np.array(color_img),zoom=1.5)
xy = img_list[i][1].tolist()
ab = osb.AnnotationBbox(osb_img,xy,pad=0,xycoords='data')
plot.add_artist(ab)
#draw and show the plot
pl.draw()
pl.show()