当前位置: 首页>>代码示例>>Python>>正文


Python PCA.tolist方法代码示例

本文整理汇总了Python中sklearn.decomposition.PCA.tolist方法的典型用法代码示例。如果您正苦于以下问题:Python PCA.tolist方法的具体用法?Python PCA.tolist怎么用?Python PCA.tolist使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.decomposition.PCA的用法示例。


在下文中一共展示了PCA.tolist方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: dim_survey

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import tolist [as 别名]
def dim_survey(X, entry_id):

    # convert to numpy
    X = np.array(X)

    # run the reduction.
    X_pca = PCA(n_components=3).fit_transform(X)
    X_tsne = TSNE(n_components=3).fit_transform(X)
    X_ica = FastICA(n_components=3).fit_transform(X)

    # connect to db.
    with mongoctx() as db:

        # update the stuff.
        db['entry'].update(
            {
                '_id': ObjectId(entry_id)
            },
            {
                '$set': {
                    'pca': X_pca.tolist(),
                    'tsne': X_tsne.tolist(),
                    'ica': X_ica.tolist(),
                }
            }
        )
开发者ID:jim-bo,项目名称:dimwit,代码行数:28,代码来源:tasks.py

示例2: preprocessing

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import tolist [as 别名]
    def preprocessing(self):
        tmpFile = self.saveTempFile( self._traces, self._labels )

        X, Y = self.getArray(tmpFile.name)

        os.unlink(tmpFile.name)

        X_scaled = preprocessing.scale(X)
        X_pca = PCA().fit_transform(X_scaled)
        
        self._tracesSet   = X_pca.tolist()
        self._tracesLabel = Y
开发者ID:louisalflame,项目名称:pythonSvm,代码行数:14,代码来源:tracePredict.py

示例3: testKMeansLearner

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import tolist [as 别名]
def testKMeansLearner(ds,learner):
    ''' Takes a kmeans learner and test data and 
        runs cross validation '''

    #make copy of original ds
    pca_ds = copy.deepcopy(ds)

    if learner == createPCAKMeansLearner:
        
        examples = [row[:-1] for row in pca_ds.examples]
        norm_examples = scale(examples).tolist()    
        reduced_data = PCA(n_components=2).fit_transform(norm_examples)

        reduced_data_with_output = [reduced_data.tolist()[i]+[row[-1]] for i,row in enumerate(pca_ds.examples)]        

        pca_ds.examples = reduced_data_with_output
        pca_ds.target = 2
        pca_ds.inputs = range(0,2)

    return cross_validation(learner, pca_ds) 
开发者ID:evancasey,项目名称:cs158-machine-learning,代码行数:22,代码来源:assignment4.py

示例4: search

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import tolist [as 别名]
def search():
    b_start = time.time()
    
    searches = request.args.getlist('filename', None)
    level = int(request.args.get('level', 19))
    region = request.args.get('region', 'Allegheny')
    limit = int(request.args.get('limit', 25))
    if limit > 100:
        limit = 100

    for i, s in enumerate(searches):
        if not s.endswith('.png'):
            searches[i] = searches[i] + '.png'
    
    #tsne params
    perplexity = float(request.args.get('perplexity', 30.0))
    early_exaggeration = float(request.args.get('early_x', 4.0))
    learning_rate = float(request.args.get('learning_rate', 1000.0))
    metric = request.args.get('metric', 'euclidean')
    to_pca = bool(request.args.get('pca', True))
    pca_only = bool(request.args.get('pca_only', True))
    
    matches = []
    res = {}
    print searches
    
    region_level = (region, level)
    
    filenames = all_filenames[region_level]
    tile_dict = tile_dicts[region_level]
    features = all_features[region_level]
    neighbours = knn_trees[region_level]
    features_filename = features_filenames[region_level]
    if searches is not None:
        
        t_start = time.time()
        
        # makes searches into a singleton list if it isn't already a list
        searches = maybe_wrap(searches)
        
        search_features = []
        for search in searches:
            try:
                filename_index = tile_dict[search]
                search_feature = features[filename_index]
                search_features.append(search_feature)
            except KeyError:
                pass
        
        # centroid of search_features given
        
        if len(search_features) > 0:
            search_features = [np.mean(search_features, axis=0)]
            m = time.time()
            distances, indices = neighbours.kneighbors(search_features, limit)
            print 'Ball tree request time: ' + str(time.time() - m)

            similar_features = [features[i] for i in indices[0]]
                        
            if pca_only:
                reduced_dim = PCA(n_components=2).fit_transform(similar_features)
            else:
                reduced_dim = FastICA(n_components=2).fit_transform(similar_features)
            
            reduced_dim = reduced_dim/np.max(np.abs(reduced_dim))
            
            # key is 'tsne_pos' as server code still expects 'tsne_pos' as the key
            # even though dim. reduction is done with PCA now
            matches = [{'distance':dist, 'filename':filenames[i], 'tsne_pos':tuple(tsne_pos)} \
                       for dist, i, tsne_pos in zip(distances[0].tolist(), indices[0].tolist(), reduced_dim.tolist())]
            
            t_duration = time.time() - t_start
            print 'Total duration: ' + str(t_duration)
            res = {'duration' : t_duration,
                   'features_file': features_filename,
                   'matches': matches,
                   }
        else:
            res = {'error':'tile not found'}
    else:
        res = {'error':'filename param missing'}
    return json.dumps(res)
开发者ID:3660628,项目名称:terrapattern,代码行数:84,代码来源:server.py

示例5: getKMeansVoronoi

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import tolist [as 别名]
def getKMeansVoronoi(matrix, k, max_iter, initMethod, n_init, tolerance, metric_dist, filenames):
    """
    Generate an array of centroid index based on the active files, list of points for the centroids, and a list 
    of points for the chunks.

    Args:
        NumberOnlymatrix: a numpy matrix without file names and word
        matrix: a python matrix representing the counts of words in files
        k: int, k-value
        max_iter: int, maximum number of iterations
        initMethod: str, method of initialization: 'k++' or 'random'
        n_init: int, number of iterations with different centroids
        tolerance: float, relative tolerance, inertia to declare convergence 
        metric_dist: str, method of the distance metrics
        filenames: list of active files


    Returns:
        bestIndex: an array of the cluster index for each sample 
        siltteScore: float, silhouette score
        colorChart: string of rgb tuples  
        finalPointsList: list of xy coords for each chunk 
        finalCentroidsList: list of xy coords for each centroid 
        textData: dicitonary of labels, xcoord, and ycoord 
        maxVal: the maximum x or y value used to set bounds in javascript
    """

    NumberOnlymatrix = matrix.tolist()

    # xy coordinates for each chunk
    reduced_data = PCA(n_components=2).fit_transform(matrix)

    # n_init statically set to 300 for now. Probably should be determined based on number of active files
    kmeans = KMeans(init=initMethod, n_clusters=k, n_init=n_init, tol=tolerance, max_iter=max_iter)
    kmeansIndex = kmeans.fit_predict(reduced_data)
    bestIndex = kmeansIndex.tolist()
    fullCoordList = reduced_data.tolist()

    # make an array centroidGroups whose elements are the coords that belong to each centroid
    i = 1
    seen = [bestIndex[0]]
    centroidGroups = [[] for _ in range(k)]  # make a list of k lists, one for each cluster
    centroidGroups[bestIndex[0]].append((fullCoordList[0]))  # Group the centroids based on their cluster number

    while i < len(bestIndex):
        if bestIndex[i] in seen:
            centroidGroups[bestIndex[i]].append(fullCoordList[i])
            i += 1
        else:
            seen.append(bestIndex[i])
            centroidGroups[bestIndex[i]].append(fullCoordList[i])
            i += 1

    # Separate the x an y coordinates to calculate the centroid
    xsList = []
    ysList = []
    for i in xrange(0, len(centroidGroups)):
        tempXcoordList = []
        tempYcoordList = []
        for j in xrange(0, len(centroidGroups[i])):
            tempXcoord = centroidGroups[i][j][0]
            tempXcoordList.append(tempXcoord)
            tempYcoord = centroidGroups[i][j][1]
            tempYcoordList.append(tempYcoord)
        xsList.append(tempXcoordList)
        ysList.append(tempYcoordList)

    # calculate the coordinates for the centroid
    centroidCoords = []
    for i in xrange(0, len(xsList)):
        if len(xsList[i]) == 1:
            temp1 = xsList[i][0]  # each element in xslist is a list, but we need an int
            temp2 = ysList[i][0]  # each element in yslist is a list, but we need an int
            centroidCoords.append([temp1, temp2])
        else:
            centroidCoord = centroid(xsList[i], ysList[i])
            centroidCoords.append(centroidCoord)

    xs, ys = reduced_data[:, 0], reduced_data[:, 1]

    origXs = xs.tolist()
    origYs = ys.tolist()

    # Looks the same as above but necessary because neither can be manipulated more than once
    xs = xs.tolist()
    ys = ys.tolist()

    # Translate every coordinate to positive as svg starts at top left with coordinate (0,0)
    transX = abs(min(xs)) + 100
    transY = abs(min(ys)) + 100

    transXs, transYs = translateCoordsToPositive(origXs, origYs, transX, transY)

    # Find the max coordinate to help determine the width (D3)
    maxX = max(transXs)
    maxY = max(transYs)

    maxList = [maxX, maxY]

    maxVal = max(maxList)
#.........这里部分代码省略.........
开发者ID:TheLady,项目名称:Lexos,代码行数:103,代码来源:KMeans.py

示例6: getKMeans

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import tolist [as 别名]
def getKMeans(NumberOnlymatrix, matrix, k, max_iter, initMethod, n_init, tolerance, DocTermSparseMatrix, metric_dist):
    """
    Generate an array of centroid index based on the active files.

    Args:
        NumberOnlymatrix: a matrix without file names and word
        matrix: a matrix representing the counts of words in files
        k: int, k-value
        max_iter: int, maximum number of iterations
        initMethod: str, method of initialization: 'k++' or 'random'
        n_init: int, number of iterations with different centroids
        tolerance: float, relative tolerance, inertia to declare convergence 
        DocTermSparseMatrix: sparse matrix of the word counts
        metric_dist: str, method of the distance metrics


    Returns:
        kmeansIndex: a numpy array of the cluster index for each sample 
        siltteScore: float, silhouette score
    """

    """Parameters for KMeans (SKlearn)"""
    # n_clusters: int, optional, default: 8
    #             namely, K;  number of clusters to form OR number of centroids to generate
    # max_iter :  int
    #             Maximum number of iterations of the k-means algorithm for a single run
    # n_init :    int, optional, default: 10
    #             Number of time the k-means algorithm will be run with different centroid seeds
    # init :      'k-means++', 'random' or an ndarray
    #             method for initialization; 
    #            'k-means++': selects initial cluster centers for k-mean clustering in a smart way to speed up convergence
    # precompute_distances : boolean
    # tol :       float, optional default: 1e-4
    #             Relative tolerance w.r.t. inertia to declare convergence
    # n_jobs :    int
    #             The number of jobs to use for the computation
    #             -1 : all CPUs are used
    #             1 : no parallel computing code is used at all; useful for debugging
    #             For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. 
    #             -2 : all CPUs but one are used.

    

    inequality = '≤'.decode('utf-8')
    # trap bad silhouette score input

    #Convert from sparse matrix
    data= DocTermSparseMatrix.toarray()


    #coordinates for each cluster
    reduced_data = PCA(n_components=2).fit_transform(data)

    coordList=reduced_data.tolist()


    #Run fit_predict 100 times and find the most common combo to account for variation

    combosDict= {}

    for i in xrange(0,300):
        kmeans = KMeans(init= initMethod, n_clusters=k, n_init=n_init)
        kmeansIndex = kmeans.fit_predict(reduced_data)
        item= kmeansIndex.tolist()
        combo= ' '.join(str(x) for x in item)
        if combo in combosDict:
            combosDict[combo]+=1
        else:
            combosDict[combo]=1

    values=list(combosDict.values())
    keys=list(combosDict.keys())
    bestKey=keys[values.index(max(values))]
    stringIndex= bestKey.split()

    bestIndex=[]
    for x in stringIndex:
        bestIndex.append(int(x))



    if k<= 2:
        siltteScore = "N/A [Not avaiable for K " + inequality + " 2]"

    elif (k > (matrix.shape[0]-1)):
        siltteScore = 'N/A [Not avaiable if (K value) > (number of active files -1)]'

    else:
        kmeans.fit(NumberOnlymatrix)
        labels = kmeans.labels_  # for silhouette score
        siltteScore = getSiloutteOnKMeans(labels, matrix, metric_dist)

    return bestIndex, siltteScore # integer ndarray with shape (n_samples,) -- label[i] is the code or index of the centroid the i'th observation is closest to
开发者ID:jarthorn,项目名称:Lexos,代码行数:95,代码来源:KMeans.py

示例7: plotPCAKMeansLearner

# 需要导入模块: from sklearn.decomposition import PCA [as 别名]
# 或者: from sklearn.decomposition.PCA import tolist [as 别名]
def plotPCAKMeansLearner(ds,num_clusters=10):
    ''' Takes a kmeans learner and ds, outputs
        a virinoi diagram with each input point plotted '''

    #dimension reduction steps
    examples = [row[:-1] for row in ds.examples]
    norm_examples = scale(examples).tolist()    
    reduced_data = PCA(n_components=2).fit_transform(norm_examples)
    reduced_data_with_output = [reduced_data.tolist()[i]+[row[-1]] for i,row in enumerate(ds.examples)]        

    #instantiate and fit kmeans object
    pca_kmeans = KMeans(init='k-means++', n_clusters=num_clusters, n_init=10)
    pca_kmeans.fit(reduced_data)

    #get the boundaries of our grid
    x_min, x_max = reduced_data[:, 0].min() + 1, reduced_data[:, 0].max() - 1
    y_min, y_max = reduced_data[:, 1].min() + 1, reduced_data[:, 1].max() - 1

    #create our background grid
    plot = pl.subplot(111)
    plot.patch.set_facecolor('black')
    pl.xlim([x_min,x_max])
    pl.ylim([y_min,y_max])

    #create a dictionary of the cluster to img_list
    img_dict = defaultdict(list)
    for i,row in enumerate(ds.examples):

        #get the cluster
        cluster = pca_kmeans.labels_[i]

        #get the img and img coordinates
        coordinates = reduced_data[i]
        img = np.array(ds.examples[i][:-1]).reshape((16,16))

        #for each cluster append the img to its value
        img_dict[cluster].append((img,coordinates))


    color_map = {0: [1.0,0.0,0.0],
                 1: [0.93,0.57,0.13],
                 2: [1.0,0.8431,0.0],
                 3: [.1961,.8039,.1961],
                 4: [0,1.0,0],
                 5: [0.0,0.8078,.8196],
                 6: [.098,.098,.4392],
                 7: [.6275,.1255,.9412],
                 8: [1.0,.4118,.7059],
                 9: [1.0,1.0,1.0] }

    #plot a subset of images for each cluster
    for cluster,v in img_dict.items():

        #sample 10 images
        img_list = random.sample(v,10)

        #plot each subset 
        for i,img_and_coord in enumerate(v):            
            if i < 10:                

                color_img = img_list[i][0].tolist()

                #change colors
                for j,row in enumerate(color_img):
                    for k,val in enumerate(row):
                        if val == 1.0:
                            color_img[j][k] = color_map[cluster]
                        else:
                            color_img[j][k] = [0.0,0.0,0.0]
                
                osb_img = osb.OffsetImage(np.array(color_img),zoom=1.5)
                xy = img_list[i][1].tolist()
                ab = osb.AnnotationBbox(osb_img,xy,pad=0,xycoords='data')                                  
                plot.add_artist(ab)

    #draw and show the plot
    pl.draw()
    pl.show()
开发者ID:evancasey,项目名称:cs158-machine-learning,代码行数:80,代码来源:assignment4.py


注:本文中的sklearn.decomposition.PCA.tolist方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。