本文整理汇总了Python中sklearn.cluster.DBSCAN.fit_predict方法的典型用法代码示例。如果您正苦于以下问题:Python DBSCAN.fit_predict方法的具体用法?Python DBSCAN.fit_predict怎么用?Python DBSCAN.fit_predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.cluster.DBSCAN
的用法示例。
在下文中一共展示了DBSCAN.fit_predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: plot_dbscan
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def plot_dbscan():
X, y = make_blobs(random_state=0, n_samples=12)
dbscan = DBSCAN()
clusters = dbscan.fit_predict(X)
clusters
fig, axes = plt.subplots(3, 4, figsize=(11, 8), subplot_kw={'xticks': (), 'yticks': ()})
# Plot clusters as red, green and blue, and outliers (-1) as white
colors = ['r', 'g', 'b']
markers = ['o', '^', 'v']
# iterate over settings of min_samples and eps
for i, min_samples in enumerate([2, 3, 5]):
for j, eps in enumerate([1, 1.5, 2, 3]):
# instantiate DBSCAN with a particular setting
dbscan = DBSCAN(min_samples=min_samples, eps=eps)
# get cluster assignments
clusters = dbscan.fit_predict(X)
print("min_samples: %d eps: %f cluster: %s" % (min_samples, eps, clusters))
if np.any(clusters == -1):
c = ['w'] + colors
m = ['o'] + markers
else:
c = colors
m = markers
discrete_scatter(X[:, 0], X[:, 1], clusters, ax=axes[i, j], c=c, s=8, markers=m)
inds = dbscan.core_sample_indices_
# vizualize core samples and clusters.
if len(inds):
discrete_scatter(X[inds, 0], X[inds, 1], clusters[inds],
ax=axes[i, j], s=15, c=colors,
markers=markers)
axes[i, j].set_title("min_samples: %d eps: %.1f" % (min_samples, eps))
fig.tight_layout()
示例2: dbscan
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def dbscan(similarity, concepts=2, euclid=False):
if euclid:
model = DBSCAN(eps=0.6, min_samples=10, algorithm='auto', leaf_size=30)
return model.fit_predict(similarity)
else:
model = DBSCAN(eps=0.6, min_samples=10, metric='precomputed', algorithm='auto', leaf_size=30)
return model.fit_predict(1 - similarity)
示例3: search_charges
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def search_charges(self, data, z=0, threshold = 30):
A = deriv(data,z)
print 'Searching charges...'
time0 = time.time()
det = A[3]*A[5]-A[4]**2
dx = -(A[1]*A[5]-A[2]*A[4])/det
dy = -(A[2]*A[3]-A[1]*Aa[4])/det
datamax = A[0]+A[1]*dx+A[2]*dy+A[3]*dx**2/2+A[4]*dx*dy+A[5]*dy**2/2
t = np.where((np.abs(dx) < 1)*(np.abs(dy) < 1)*(np.abs(datamax) > threshold)*(det > 0))
x = np.array([t[1]+dx[t], t[0]+dy[t]]).T
db = DBSCAN(min_samples = 1, eps = 1)
db.fit_predict(x)
n_charges = np.max(db.labels_)+1
qi = np.zeros(n_charges)
xi = np.zeros((3,n_charges))
for i in range(0, n_charges):
xi[0:2,i] = np.mean(x[db.labels_ == i,:], axis=0)
qi[i] = np.mean(datamax[t][db.labels_ == i])
self.set_charges(qi,xi)
print 'Done! Elapsed time: '+str(time.time()-time0)
return self
示例4: _fit_dbscan
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def _fit_dbscan(self, x):
# clustering
for r in xrange(self.repeats):
# info
if self.debug is True:
print '\t[%s][c:%d][r:%d]' % (self.clus_type, k, r + 1),
# fit and evaluate model
model = DBSCAN(eps=1.0, min_samples=100)
model.fit_predict(x)
k = len(set(model.labels_)) - (1 if -1 in model.labels_ else 0)
self._labels[r] = model.labels_
self._parameters[r] = model.core_sample_indices_
# build equivalent gmm
model_gmm = GMM(n_components=k, covariance_type="full")
model_gmm.means_ = model.core_sample_indices_
model_gmm.covars_ = sp.ones(
(k, self.input_dim)) * self.sigma_factor
model_gmm.weights_ = sp.array(
[(self._labels[r] == i).sum() for i in xrange(k)])
# evaluate goodness of fit
self._ll[r] = model_gmm.score(x).sum()
if self.gof_type == 'aic':
self._gof[r] = model_gmm.aic(x)
if self.gof_type == 'bic':
self._gof[r] = model_gmm.bic(x)
# debug info
if self.debug is True:
print self._gof[r]
示例5: current_datapoints_dbscan
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def current_datapoints_dbscan(self):
"""
Method clusters points-outliers (after current_datapoints_threshold_filter and current_datapoints_outliers_filter) into slice-clusters using DBSCAN.
Returns dict of slice-clusters - base for event-candidates. Uses self.eps attribute to estimate cluster boundaries.
"""
nets = self.current_datapoints.keys()
ids = concatenate([self.current_datapoints[x]['ids'] for x in nets])
coords = concatenate([self.current_datapoints[x]['array'] for x in nets])
weights = concatenate([self.current_datapoints[x]['weights'] for x in nets])
if len(ids) > 0:
clustering = DBSCAN(eps=self.eps, min_samples=5)
labels = clustering.fit_predict(coords)
core_ids = ids[clustering.core_sample_indices_]
ids = ids[labels > -1]
coords = coords[labels > -1]
weights = weights[labels > -1]
labels = labels[labels > -1]
ret_tab = {}
for i in range(len(labels)):
try:
ret_tab[labels[i]].append({'id':ids[i], 'lng':coords[i,0], 'lat':coords[i,1], 'weight':weights[i], 'is_core':ids[i] in core_ids})
except KeyError:
ret_tab[labels[i]] = [{'id':ids[i], 'lng':coords[i,0], 'lat':coords[i,1], 'weight':weights[i], 'is_core':ids[i] in core_ids}]
return ret_tab
else:
return {}
示例6: cluster_DBSCAN
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def cluster_DBSCAN(args):
"""
Clustering with Ward hierarchical clustering: constructs a tree and cuts it.
"""
#load data
g_it = node_link_data.node_link_data_to_eden(input = args.input_file, input_type = "file")
vec = graph.Vectorizer(r = args.radius,d = args.distance, nbits = args.nbits)
logger.info('Vectorizer: %s' % vec)
X = vec.transform(g_it, n_jobs = args.n_jobs)
logger.info('Instances: %d Features: %d with an avg of %d features per instance' % (X.shape[0], X.shape[1], X.getnnz() / X.shape[0]))
#project to lower dimensional space to use clustering algorithms
transformer = TruncatedSVD(n_components=args.n_components)
X_dense=transformer.fit_transform(X)
#log statistics on data
logger.info('Dimensionality reduction Instances: %d Features: %d with an avg of %d features per instance' % (X_dense.shape[0], X_dense.shape[1], X.getnnz() / X.shape[0]))
#clustering
clustering_algo = DBSCAN(eps = args.eps)
y = clustering_algo.fit_predict(X_dense)
msg = 'Predictions statistics: '
msg += util.report_base_statistics(y)
logger.info(msg)
#save model for vectorizer
out_file_name = "vectorizer"
eden_io.dump(vec, output_dir_path = args.output_dir_path, out_file_name = out_file_name)
logger.info("Written file: %s/%s",args.output_dir_path, out_file_name)
#save result
out_file_name = "labels"
eden_io.store_matrix(matrix = y, output_dir_path = args.output_dir_path, out_file_name = out_file_name, output_format = "text")
logger.info("Written file: %s/%s",args.output_dir_path, out_file_name)
示例7: cluster_with_dbscan
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def cluster_with_dbscan(vectors, epsilon=0.5, min_samples=5, distances=None, metric="euclidean"):
# precomputing our distances will be faster as we can use multiple cores
if distances is None:
distances = pairwise_distances(vectors, n_jobs=-1, metric=metric)
dbscan = DBSCAN(eps=epsilon, min_samples=min_samples, metric="precomputed")
return dbscan.fit_predict(distances)
示例8: dbscan_outliers
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def dbscan_outliers(df):
"""
Find outliers (noise points) using DBSCAN.
Parameters
----------
df: A pandas.DataFrame
Returns
-------
A tuple of (a sklearn.DBSCAN instance, a pandas.DataFrame)
"""
scaler = StandardScaler()
scaler.fit(df)
scaled = scaler.transform(df)
dbs = DBSCAN()
db = dbs.fit(scaled)
outliers = dbs.fit_predict(scaled)
df_o = df.ix[np.nonzero(outliers)]
return db, df_o
示例9: _cluster
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def _cluster(params):
cls = None
method = sh.getConst('method')
if method=='kmedoid':
assert False
# from kmedoid import kmedsoid
# cls = kmedoid
elif method=='dbscan':
from sklearn.cluster import DBSCAN
cls = DBSCAN(eps=params['eps'],min_samples=params['min_samples'],
metric='precomputed')
else:
assert False, 'FATAL: unknown cluster method'
##
mat = sh.getConst('mat')
labels = cls.fit_predict(mat)
nLabels = len(set(labels))
##
sil = None; cal = None
if (nLabels >= 2)and(nLabels <= len(labels)-1):
sil = met.silhouette_score(mat,labels,'precomputed')
cal = met.calinski_harabaz_score(mat,labels)
perf = dict(silhouette_score=sil,calinski_harabaz_score=cal)
return (labels,perf)
示例10: DBScan_Flux
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def DBScan_Flux(phots, ycenters, xcenters, dbsClean=0, useTheForce=False):
"""Class methods are similar to regular functions.
Note:
Do not include the `self` parameter in the ``Args`` section.
Args:
param1: The first parameter.
param2: The second parameter.
Returns:
True if successful, False otherwise.
"""
dbsPhots = DBSCAN()#n_jobs=-1)
stdScaler = StandardScaler()
phots = np.copy(phots.ravel())
phots[~np.isfinite(phots)] = np.median(phots[np.isfinite(phots)])
featuresNow = np.transpose([stdScaler.fit_transform(ycenters[:,None]).ravel(), \
stdScaler.fit_transform(xcenters[:,None]).ravel(), \
stdScaler.fit_transform(phots[:,None]).ravel() ] )
# print(featuresNow.shape)
dbsPhotsPred= dbsPhots.fit_predict(featuresNow)
return dbsPhotsPred == dbsClean
示例11: get_clusters
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def get_clusters(tracks):
neighbors = g.m.neighborsSpin.value()
dist = g.m.neighborDistanceSpin.value()
data = np.array([[tr['mean_x'], tr['mean_y']] for tr in tracks])
scanner = DBSCAN(eps=dist, min_samples=neighbors)
ids = scanner.fit_predict(data)
return ids
示例12: cluster_dbscan
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def cluster_dbscan(matrix, distance_measure="sts", eps=1):
"""Clusters the distance matrix for a given epsilon value, if distance
measure is sts. Other distance measures are: [‘cityblock’, ‘cosine’,
‘euclidean’, ‘l1’, ‘l2’, ‘manhattan’, ‘braycurtis’, ‘canberra’,
‘chebyshev’, ‘correlation’, ‘dice’, ‘hamming’, ‘jaccard’, ‘kulsinski’,
‘mahalanobis’, ‘matching’, ‘minkowski’, ‘rogerstanimoto’, ‘russellrao’,
‘seuclidean’, ‘sokalmichener’, ‘sokalsneath’, ‘sqeuclidean’, ‘yule’]
Parameters
----------
matrix: np.matrix
The input matrix. If distance measure is sts, this should be the sts
distance matrix. If other distance, this should be the time-series
matrix of size ngenes x nsamples.
distance_measure: str
The distance measure, default is sts, short time-series distance.
Any distance measure available in scikit-learn is available here.
Note: multiple time-series is NOT supported for distances other than
"sts".
Returns
-------
cluster_labels: list of int
A list of size ngenes that defines cluster membership.
"""
if (distance_measure == "sts"):
dbs = DBSCAN(eps=eps, metric='precomputed', min_samples=2)
else:
dbs = DBSCAN(eps=eps, metric=distance_measure, min_samples=2)
cluster_labels = dbs.fit_predict(matrix)
return cluster_labels
示例13: cluster_lvl1
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def cluster_lvl1(self, data):
db = DBSCAN(eps=2., min_samples=2, metric='precomputed')
processed = np.float32(np.vstack([
np.mgrid[:self.map_height, :self.map_width].reshape(2, -1),
data.ravel()
])).T
dist = self.distances_for_lvl1(processed)
return db.fit_predict(dist).reshape(self.map_height, self.map_width)
示例14: regroup
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def regroup(self, maxdistance, minsize, algo = 'auto'):
self.__loginfo('Regrouping')
dbsfit = DBSCAN(eps=maxdistance, min_samples=minsize, algorithm=algo).fit(self.primarylist)
dbsresult = dbsfit.fit_predict(self.primarylist)
grouplist = []
for grouplabel in dbsresult:
if not grouplabel in grouplist: grouplist.append(grouplabel)
self.__loginfo('Group label count: %s' % len(grouplist))
示例15: main
# 需要导入模块: from sklearn.cluster import DBSCAN [as 别名]
# 或者: from sklearn.cluster.DBSCAN import fit_predict [as 别名]
def main(datafile, feature1, feature2, normalize, clusteroutput, percentile, copula):
X, features = read_sah_h5(datafile, just_good=False)
if 'id' not in features:
ids = np.arange(len(X))
else:
ids = X[:, features.index('id')]
x = X[:, features.index(feature1)]
y = X[:, features.index(feature2)]
D = np.column_stack([x, y])
idx = np.random.randint(len(X), size=10000)
D = D[idx]
ids = ids[idx]
if normalize:
mean = np.average(D, axis=0)
std = np.std(D, axis=0)
std[np.nonzero(std == 0.0)] = 1.0 # Avoid NaNs
Dnorm = (D - mean) / std
elif copula:
Dnorm = np.column_stack([copula_transform(f) for f in D.T])
else:
Dnorm = D
kmeans = MiniBatchKMeans(n_clusters=50)
gmm = GMM(n_components=200, covariance_type='full', verbose=True)
#C = gmm.fit_predict(Dnorm)
dbscan = DBSCAN(eps=100.0, min_samples=1)
C = dbscan.fit_predict(Dnorm)
print C
with open(clusteroutput, 'w+') as f:
for c, i in zip(C, ids):
f.write('%d,%d\n' % (i, c))
pl.scatter(D[:, 0], D[:, 1], color=pl.cm.spectral(C.astype(float) / np.max(C)))
# for c in np.unique(C):
# pl.bar(0, 0, lw=0, ec='none',
# fc=pl.cm.spectral(float(c) / np.max(C)), label='Cluster %d' % c)
# pl.legend(loc='upper left')
if percentile > 0:
pl.xlim(
scoreatpercentile(x, percentile),
scoreatpercentile(x, 100-percentile)
)
pl.ylim(
scoreatpercentile(y, percentile),
scoreatpercentile(y, 100-percentile)
)
pl.xlabel(feature1)
pl.ylabel(feature2)
pl.show()