Python WardAgglomeration.transform方法代码示例

本文整理汇总了Python中sklearn.cluster.WardAgglomeration.transform方法的典型用法代码示例。如果您正苦于以下问题：Python WardAgglomeration.transform方法的具体用法？Python WardAgglomeration.transform怎么用？Python WardAgglomeration.transform使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.cluster.WardAgglomeration的用法示例。

在下文中一共展示了WardAgglomeration.transform方法的7个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_ward_agglomeration

# 需要导入模块: from sklearn.cluster import WardAgglomeration [as 别名]
# 或者: from sklearn.cluster.WardAgglomeration import transform [as 别名]
def test_ward_agglomeration():
    """
    Check that we obtain the correct solution in a simplistic case
    """
    rnd = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    X = rnd.randn(50, 100)
    connectivity = grid_to_graph(*mask.shape)
    ward = WardAgglomeration(n_clusters=5, connectivity=connectivity)
    ward.fit(X)
    assert_true(np.size(np.unique(ward.labels_)) == 5)

    Xred = ward.transform(X)
    assert_true(Xred.shape[1] == 5)
    Xfull = ward.inverse_transform(Xred)
    assert_true(np.unique(Xfull[0]).size == 5)
    assert_array_almost_equal(ward.transform(Xfull), Xred)

开发者ID:2011200799，项目名称:scikit-learn，代码行数:19，代码来源:test_hierarchical.py

示例2: prepare_data

# 需要导入模块: from sklearn.cluster import WardAgglomeration [as 别名]
# 或者: from sklearn.cluster.WardAgglomeration import transform [as 别名]
def prepare_data(imgs, connectivity, mask, n_clusters=5000, n_components=100):
    # data preparation
    Z = nifti_masker.fit_transform(imgs)
    pca = RandomizedPCA(n_components=n_components)
    Z_ = pca.fit_transform(Z.T).T
    ward = WardAgglomeration(n_clusters=n_clusters, connectivity=connectivity,
                             memory='nilearn_cache').fit(Z_)
    W = ward.transform(Z)
    del Z
    # data cube is a more convenient representation
    cube = np.array([W[subject_label == subject]
                     for subject in np.arange(n_subjects)])
    # parcel connectivity
    parcel_connectivity = do_parcel_connectivity(mask, n_clusters, ward)
    return cube, ward, parcel_connectivity

开发者ID:bthirion，项目名称:fMRI_PCR，代码行数:17，代码来源:script_localizer.py

示例3: representation

# 需要导入模块: from sklearn.cluster import WardAgglomeration [as 别名]
# 或者: from sklearn.cluster.WardAgglomeration import transform [as 别名]
first_epi = nifti_masker.inverse_transform(fmri_masked[0]).get_data()
first_epi = np.ma.masked_array(first_epi, first_epi == 0)
# Outside the mask: a uniform value, smaller than inside the mask
first_epi[np.logical_not(mask)] = 0.9 * first_epi[mask].min()
vmax = first_epi[..., 20].max()
vmin = first_epi[..., 20].min()
pl.imshow(np.rot90(first_epi[..., 20]),
          interpolation='nearest', cmap=pl.cm.spectral, vmin=vmin, vmax=vmax)
pl.axis('off')
pl.title('Original (%i voxels)' % fmri_masked.shape[1])

# A reduced data can be create by taking the parcel-level average:
# Note that, as many objects in the scikit-learn, the ward object exposes
# a transform method that modifies input features. Here it reduces their
# dimension
fmri_reduced = ward.transform(fmri_masked)

# Display the corresponding data compressed using the parcellation
fmri_compressed = ward.inverse_transform(fmri_reduced)
compressed = nifti_masker.inverse_transform(
    fmri_compressed[0]).get_data()
compressed = np.ma.masked_equal(compressed, 0)


pl.figure()
pl.imshow(np.rot90(compressed[:, :, 20]),
          interpolation='nearest', cmap=pl.cm.spectral, vmin=vmin, vmax=vmax)
pl.title('Compressed representation (2000 parcels)')
pl.axis('off')
pl.show()

开发者ID:jcketz，项目名称:nilearn，代码行数:32，代码来源:plot_rest_clustering.py

示例4:

# 需要导入模块: from sklearn.cluster import WardAgglomeration [as 别名]
# 或者: from sklearn.cluster.WardAgglomeration import transform [as 别名]
labels[mask] = ward.labels_

cut = labels[:, :, 20].astype(np.int)
colors = np.random.random(size=(ward.n_clusters + 1, 3))
colors[-1] = 0
pl.axis('off')
pl.imshow(colors[cut], interpolation='nearest')
pl.title('Ward parcellation')

# Display the original data
pl.figure()
first_epi_img = epi_img[..., 0].copy()
first_epi_img[np.logical_not(mask)] = 0
pl.imshow(first_epi_img[..., 20], interpolation='nearest',
           cmap=pl.cm.spectral)
pl.axis('off')
pl.title('Original')

# Display the corresponding data compressed using the parcellation
X_r = ward.transform(epi_masked.T)
X_c = ward.inverse_transform(X_r)
compressed_img = np.zeros(mask.shape)
compressed_img[mask] = X_c[0]

pl.figure()
pl.imshow(compressed_img[:, :, 20], interpolation='nearest',
           cmap=pl.cm.spectral)
pl.title('Compressed representation')
pl.axis('off')
pl.show()

开发者ID:dengemann，项目名称:nisl.github.com，代码行数:32，代码来源:plot_nyu_rest_clustering.py

示例5: feature_extractor

# 需要导入模块: from sklearn.cluster import WardAgglomeration [as 别名]
# 或者: from sklearn.cluster.WardAgglomeration import transform [as 别名]
def feature_extractor(imgfile, maskfile, featurefile, maskerfile, wardfile, nclusters=[1000,], selectfile=None, targetfile=None, metafile=None, cachefile=None):
    
    resultdict = {"imgfile":imgfile, "maskfile":maskfile}
    # load data
    print "--loading data"
    nifti_masker = input_data.NiftiMasker(mask=maskfile, memory=cachefile, memory_level=1,
                              standardize=False)
    fmri_masked = nifti_masker.fit_transform(imgfile)
    print "--getting mask"
    mask = nifti_masker.mask_img_.get_data().astype(np.bool)
    
    # saveit
    joblib.dump(nifti_masker, maskerfile)
    resultdict["mask"]  = mask
    resultdict["Xmask"] = fmri_masked
    resultdict["maskerfile"] = maskerfile
    
    # get connectivity
    print "--getting connectivity"
    shape = mask.shape
    connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1],
                                   n_z=shape[2], mask=mask)
    # saveit
    resultdict["connectivity"]    = connectivity
    print "--save main file"
    np.savez(featurefile+"_main.npz", **resultdict)
    
    # run  ward
    y     = np.load(targetfile)["ymap"]
    meta  = np.load(metafile)
    train = meta["train"]
    test  = meta["test"]
    ncv   = meta['ycv']
    
    # for each cv set
    for cvx in range(ncv):
        trainidx = train[cvx]
        testidx  = test[cvx]
        resultdict = {}        
        wardfiles = []
        selectfiles = []
        print "--Running ward %d"%(cvx, )
        for ix, nc in enumerate(nclusters):
            ward = WardAgglomeration(n_clusters=nc, connectivity=connectivity, memory=cachefile)
            ward.fit(fmri_masked[trainidx])
            fmri_reduced_train = ward.transform(fmri_masked[trainidx])
            fmri_reduced_test  = ward.transform(fmri_masked[testidx])
            
            # saveit
            subwardfile = wardfile+"_D%d_cv%d.pkl"%(nc, cvx,)
            joblib.dump(ward, subwardfile)
            resultdict["Xward_%d_train"%(nc,)] = fmri_reduced_train
            resultdict["Xward_%d_test"%(nc,)]  = fmri_reduced_test
            wardfiles.append(subwardfile)
            
            # additional feature selection
            selector = SelectPercentile(f_classif, percentile=30)
            selector.fit(fmri_reduced_train, y[trainidx])
            fmri_select_train = selector.transform(fmri_reduced_train)
            fmri_select_test  = selector.transform(fmri_reduced_test)
            
            # saveit
            subselectfile = selectfile+"_D%d_cv%d.pkl"%(nc, cvx,)
            joblib.dump(selector, subselectfile)
            resultdict["Xselect_%d_train"%(nc,)] = fmri_select_train
            resultdict["Xselect_%d_test"%(nc,)]  = fmri_select_test
            selectfiles.append(subselectfile)
            
        resultdict["wardfiles"]   = wardfiles
        resultdict["selectfiles"] = selectfiles
        
        # save results
        print "--save cv result"
        np.savez(featurefile+"_cv%d.npz"%(cvx, ), **resultdict)

开发者ID:jdnc，项目名称:ml-project，代码行数:76，代码来源:data_extraction.py

示例6: classify

# 需要导入模块: from sklearn.cluster import WardAgglomeration [as 别名]
# 或者: from sklearn.cluster.WardAgglomeration import transform [as 别名]
def classify(x, y, classifier='naive_bayes', clustering=True, n_folds=10):
    """
    Given the predictors and labels, performs single-class
    classification with the given classifier using n-fold
    c.v. Constructs a OvO classifier for every pair of terms.
    
    Parameters
    -----------
    x : `numpy.ndarray`
        (n_samples x n_features) array of features
    y : `numpy.ndarray`
        (1 x n_samples) array of labels
    classifier : str, optional
        which classifier model to use. Must be one of 'naive_bayes'| 'svm' | 'logistic_regression' | 'ensemble'.
        Defaults to the original naive_bayes.
    clustering : bool, optional
        whether to do Ward clustering or not. Uses n_clusters = 10,000. Change global N_CLUSTERS for different
        value. Defaults to True.
    n_folds : int
        the number of fold of cv
        
    Returns
    -------
    accuracy : `numpy.ndarray`
        The results are stored as a list of confusion matrices for each fold and saved
        as a numpy array of arrays, for further analysis.
    """
    clf = None
    ward = None
    le = preprocessing.LabelEncoder()
    le.fit(y)
    y_new = le.transform(y)
    
    # choose and assign appropriate classifier
    classifier_dict = { 'naive_bayes' : MultinomialNB(),
                        'logistic_regression' : LogisticRegression(penalty='l2'),
                        'svm' : GridSearchCV(LinearSVC(), [{'C': [1, 10, 100, 1000]}])  
                       }
    if classifier == 'ensemble':
      clf_nb = classifier_dict['naive_bayes']
      clf_svm = classifier_dict['svm']
      clf_lr = classifier_dict['logistic_regression']
    else:
        clf = classifier_dict[classifier]
        
    # perform ward clustering if specified    
    if clustering:
        mask = np.load('data/2mm_brain_mask.npy')
        shape = mask.shape
        connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask)
        ward = WardAgglomeration(n_clusters=N_CLUSTERS, connectivity=connectivity)
    
    # actual cross validation    
    kf = cross_validation.KFold(len(y_new), n_folds=n_folds)
    accuracy = []
    for train, test in kf:
        x_train = x[train]
        y_train  = y_new[train]
        x_test = x[test]
        y_test = y_new[test] 
        if clustering:
            ward.fit(x_train)
            x_train = ward.transform(x_train)
            x_test = ward.transform(x_test)
        if classifier != 'ensemble':        
            predicted = clf.fit(x_train, y_train).predict(x_test)
        else:
            predicted_nb = clf_nb.fit(x_train, y_train).predict(x_test)
            predicted_lr = clf_lr.fit(x_train, y_train).predict(x_test)
            predicted_svm = clf_svm.fit(x_train, y_train).predict(x_test)
            predicted = predicted_nb + predicted_lr + predicted_svm
            predicted = np.array(predicted >= 2, dtype=int)
        conf_mat =  confusion_matrix(y_test, predicted, labels=[0,1])
        accuracy.append(conf_mat)
    return accuracy

开发者ID:jdnc，项目名称:ml-project，代码行数:77，代码来源:single_label.py

示例7: classify

# 需要导入模块: from sklearn.cluster import WardAgglomeration [as 别名]
# 或者: from sklearn.cluster.WardAgglomeration import transform [as 别名]
def classify(x, y, classifier='naive_bayes', clustering=True, n_folds=10):
    """
    Given the predictors and labels, performs multi-label 
    classification with the given classifier using n-fold
    c.v. Constructs a OvR classifier for multilabel prediction.
    
    Parameters
    -----------
    x : `numpy.ndarray`
        (n_samples x n_features) array of features
    y : `numpy.ndarray`
        (n_samples x n_labels) array of labels
    classifier : str, optional
        which classifier model to use. Must be one of 'naive_bayes'| 'decision_tree' | 'logistic_regression'.
        Defaults to the original naive_bayes.
    clustering : bool, optional
        whether to do Ward clustering or not. Uses n_clusters = 10,000. Change global N_CLUSTERS for different
        value. Defaults to True.
    n_folds : int
        the number of fold of cv
        
    Returns
    -------
    score_per_label, score_per_class : tuple
        The results are stored as a tuple of two dicts, with the keywords specifying the metrics.
    """
    clf = None
    ward = None
    
    lb = preprocessing.LabelBinarizer()
    y_new = lb.fit_transform(y)
    #specify connectivity for clustering
    mask = nb.load('data/MNI152_T1_2mm_brain.nii.gz').get_data().astype('bool')
    shape = mask.shape
    connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask)
    ward = WardAgglomeration(n_clusters=N_CLUSTERS, connectivity=connectivity)
    
    # choose and assign appropriate classifier
    classifier_dict = { 'naive_bayes' : OneVsRestClassifier(MultinomialNB()),
                        'logistic_regression' : OneVsRestClassifier(LogisticRegression(penalty='l2')),
	                'decision_tree' : tree.DecisionTreeClassifier()                     
                       }
    
    clf = classifier_dict[classifier]
    kf = cross_validation.KFold(len(y_new), n_folds=n_folds)
    score_per_class = []
    score_per_label = []
    for train, test in kf:
        x_train = np.ascontiguousarray(x[train])
        y_train = np.ascontiguousarray(y_new[train])
        x_test = np.ascontiguousarray(x[test])
        y_test = np.ascontiguousarray(y_new[test])
        if clustering: 
            ward.fit(x_train)
            x_train = ward.transform(x_train)
            x_test = ward.transform(x_test)
        model = clf.fit(x_train, y_train)
        predicted  = model.predict(x_test)
        predict_prob = model.predict_proba(x_test)
        if isinstance(predict_prob, list):
            predict_prob = np.array(predict_prob)
        cls_scores = utils.score_results(y_test, predicted, predict_prob)
        label_scores = utils.label_scores(y_test, predicted, predict_prob)
        score_per_class.append(cls_scores)
        score_per_label.append(label_scores)
    return (score_per_class,score_per_label)

开发者ID:jdnc，项目名称:ml-project，代码行数:68，代码来源:multi_label.py

注：本文中的sklearn.cluster.WardAgglomeration.transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。