本文整理汇总了Python中sklearn.utils.extmath.norm函数的典型用法代码示例。如果您正苦于以下问题:Python norm函数的具体用法?Python norm怎么用?Python norm使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了norm函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_norm_squared_norm
def test_norm_squared_norm():
X = np.random.RandomState(42).randn(50, 63)
X *= 100 # check stability
X += 200
assert_almost_equal(np.linalg.norm(X.ravel()), norm(X))
assert_almost_equal(norm(X) ** 2, squared_norm(X), decimal=6)
assert_almost_equal(np.linalg.norm(X), np.sqrt(squared_norm(X)), decimal=6)
示例2: test_norm_squared_norm
def test_norm_squared_norm():
X = np.random.RandomState(42).randn(50, 63)
X *= 100 # check stability
X += 200
assert_almost_equal(np.linalg.norm(X.ravel()), norm(X))
assert_almost_equal(norm(X) ** 2, squared_norm(X), decimal=6)
assert_almost_equal(np.linalg.norm(X), np.sqrt(squared_norm(X)), decimal=6)
# Check the warning with an int array and np.dot potential overflow
assert_warns_message(
UserWarning, 'Array type is integer, np.dot may '
'overflow. Data should be float type to avoid this issue',
squared_norm, X.astype(int))
示例3: mean_shift
def mean_shift(X, bandwidth, n_seeds, kernel_function='gaussian', max_iterations=100, proximity_thresh=5):
'''
---Parameters---
X : data in form (samples, dims)
bandwidth : radius of nearest neighbors
n_seeds :
kernel_update_function : can be "gaussian" or "flat" or your own kernel
proximity_thresh : minimum distance (in pixels) a new cluster must be away from previous ones
---Returns---
cluster_centers :
cluster_counts : how many pixels are with the neighborhood of each cluster
'''
import numpy as np
from sklearn.neighbors import BallTree, NearestNeighbors
from sklearn.utils import extmath
from sklearn.metrics.pairwise import euclidean_distances
from collections import defaultdict
if kernel_function == 'gaussian':
kernel_update_function = gaussian_kernel
elif kernel_function == 'flat':
kernel_update_function = flat_kernel
else:
kernel_update_function = kernel_function
n_points, n_features = X.shape
stop_thresh = 1e-2 * bandwidth # when mean has converged
cluster_centers = []
cluster_counts = []
# ball_tree = BallTree(X)# to efficiently look up nearby points
neighbors = NearestNeighbors(radius=bandwidth).fit(X)
seeds = X[(np.random.uniform(0,X.shape[0], n_seeds)).astype(np.int)]
# For each seed, climb gradient until convergence or max_iterations
for weighted_mean in seeds:
completed_iterations = 0
while True:
points_within = X[neighbors.radius_neighbors([weighted_mean], bandwidth, return_distance=False)[0]]
old_mean = weighted_mean # save the old mean
weighted_mean = kernel_update_function(old_mean, points_within, bandwidth)
converged = extmath.norm(weighted_mean - old_mean) < stop_thresh
if converged or completed_iterations == max_iterations:
# Only add cluster if it's different enough from other centers
if len(cluster_centers) > 0:
diff_from_prev = [np.linalg.norm(weighted_mean-cluster_centers[i], 2) for i in range(len(cluster_centers))]
if np.min(diff_from_prev) > proximity_thresh:
cluster_centers.append(weighted_mean)
cluster_counts.append(points_within.shape[0])
else:
cluster_centers.append(weighted_mean)
cluster_counts.append(points_within.shape[0])
break
completed_iterations += 1
return cluster_centers, cluster_counts
示例4: test_logistic_derivative_lipschitz_constant
def test_logistic_derivative_lipschitz_constant():
# Tests Lipschitz-continuity of of the derivative of logistic loss
rng = check_random_state(42)
grad_weight = 2.08e-1
lipschitz_constant = _logistic_derivative_lipschitz_constant(
X, mask, grad_weight)
for _ in range(20):
x_1 = rng.rand((w.shape[0] + 1)) * rng.randint(1000)
x_2 = rng.rand((w.shape[0] + 1)) * rng.randint(1000)
gradient_difference = extmath.norm(
_logistic_data_loss_and_spatial_grad_derivative(
X, y, x_1, mask, grad_weight)
- _logistic_data_loss_and_spatial_grad_derivative(
X, y, x_2, mask, grad_weight))
point_difference = extmath.norm(x_1 - x_2)
assert_true(
gradient_difference <= lipschitz_constant * point_difference)
示例5: _pa
def _pa(self, loss_t, x_t):
denom = extmath.norm(x_t) ** 2.0
# special case when L_2 norm of x_t is zero (followed libol
# implementation)
if denom == 0:
return 1
d = loss_t / denom
return d
示例6: test__squared_loss_derivative_lipschitz_constant
def test__squared_loss_derivative_lipschitz_constant():
# Tests Lipschitz-continuity of the derivative of _squared_loss loss
# function
rng = check_random_state(42)
grad_weight = 2.08e-1
lipschitz_constant = _squared_loss_derivative_lipschitz_constant(
X, mask, grad_weight)
for _ in range(20):
x_1 = rng.rand(*w.shape) * rng.randint(1000)
x_2 = rng.rand(*w.shape) * rng.randint(1000)
gradient_difference = extmath.norm(
_squared_loss_and_spatial_grad_derivative(X, y, x_1, mask,
grad_weight)
- _squared_loss_and_spatial_grad_derivative(X, y, x_2, mask,
grad_weight))
point_difference = extmath.norm(x_1 - x_2)
assert_true(
gradient_difference <= lipschitz_constant * point_difference)
示例7: _reorth
def _reorth(basis, target, rows=None, alpha=0.5):
"""Reorthogonalize a vector using iterated Gram-Schmidt
Parameters
----------
basis: ndarray, shape (n_features, n_basis)
The matrix whose rows are a set of basis to reorthogonalize against
target: ndarray, shape (n_features,)
The target vector to be reorthogonalized
rows: {array-like, None}, default None
Indices of rows from basis to use. Use all if None
alpha: float, default 0.5
Parameter for determining whether to do a second reorthogonalization.
Returns
-------
reorthed_target: ndarray, shape (n_features,)
The reorthogonalized vector
"""
if rows is not None:
basis = basis[rows]
norm_target = norm(target)
norm_target_old = 0
n_reorth = 0
while norm_target < alpha * norm_target_old or n_reorth == 0:
for row in basis:
t = fast_dot(row, target)
target = target - t * row
norm_target_old = norm_target
norm_target = norm(target)
n_reorth += 1
if n_reorth > 4:
# target in span(basis) => accpet target = 0
target = np.zeros(basis.shape[0])
break
return target
示例8: _bistochastic_normalize
def _bistochastic_normalize(X, max_iter=1000, tol=1e-5):
"""Normalize rows and columns of ``X`` simultaneously so that all
rows sum to one constant and all columns sum to a different
constant.
"""
# According to paper, this can also be done more efficiently with
# deviation reduction and balancing algorithms.
X = make_nonnegative(X)
X_scaled = X
dist = None
for _ in range(max_iter):
X_new, _, _ = _scale_normalize(X_scaled)
if issparse(X):
dist = norm(X_scaled.data - X.data)
else:
dist = norm(X_scaled - X_new)
X_scaled = X_new
if dist is not None and dist < tol:
break
return X_scaled
示例9: test_tikhonov_regularization_vs_graph_net
def test_tikhonov_regularization_vs_graph_net():
# Test for one of the extreme cases of Graph-Net: That is, with
# l1_ratio = 0 (pure Smooth), we compare Graph-Net's performance
# with the analytical solution for Tikhonov Regularization
# XXX A small dataset here (this test is very lengthy)
G = get_gradient_matrix(w.size, mask)
optimal_model = np.dot(sp.linalg.pinv(
np.dot(X.T, X) + y.size * np.dot(G.T, G)), np.dot(X.T, y))
graph_net = BaseSpaceNet(
mask=mask_, alphas=1. * X.shape[0], l1_ratios=0., max_iter=400,
fit_intercept=False,
screening_percentile=100., standardize=False)
graph_net.fit(X_, y.copy())
coef_ = graph_net.coef_[0]
graph_net_perf = 0.5 / y.size * extmath.norm(
np.dot(X, coef_) - y) ** 2\
+ 0.5 * extmath.norm(np.dot(G, coef_)) ** 2
optimal_model_perf = 0.5 / y.size * extmath.norm(
np.dot(X, optimal_model) - y) ** 2\
+ 0.5 * extmath.norm(np.dot(G, optimal_model)) ** 2
assert_almost_equal(graph_net_perf, optimal_model_perf, decimal=1)
示例10: test_lasso_vs_graph_net
def test_lasso_vs_graph_net():
# Test for one of the extreme cases of Graph-Net: That is, with
# l1_ratio = 1 (pure Lasso), we compare Graph-Net's performance with
# Scikit-Learn lasso
lasso = Lasso(max_iter=100, tol=1e-8, normalize=False)
graph_net = BaseSpaceNet(mask=mask, alphas=1. * X_.shape[0],
l1_ratios=1, is_classif=False,
penalty="graph-net", max_iter=100)
lasso.fit(X_, y)
graph_net.fit(X, y)
lasso_perf = 0.5 / y.size * extmath.norm(np.dot(
X_, lasso.coef_) - y) ** 2 + np.sum(np.abs(lasso.coef_))
graph_net_perf = 0.5 * ((graph_net.predict(X) - y) ** 2).mean()
np.testing.assert_almost_equal(graph_net_perf, lasso_perf, decimal=3)
示例11: f_regression_nosparse
def f_regression_nosparse(X, y, center=True):
"""Univariate linear regression tests
Quick linear model for testing the effect of a single regressor,
sequentially for many regressors.
This is done in 3 steps:
1. the regressor of interest and the data are orthogonalized
with respect to constant regressors
2. the cross correlation between data and regressors is computed
3. it is converted to an F score then to a p-value
Parameters
----------
X : {array-like, sparse matrix} shape = (n_samples, n_features)
The set of regressors that will tested sequentially.
y : array of shape(n_samples).
The data matrix
center : True, bool,
If true, X and y will be centered.
Returns
-------
F : array, shape=(n_features,)
F values of features.
pval : array, shape=(n_features,)
p-values of F-scores.
"""
X, y = check_arrays(X, y, dtype=np.float)
y = y.ravel()
if center:
y = y - np.mean(y)
X = X.copy('F') # faster in fortran
X -= X.mean(axis=0)
# compute the correlation
corr = np.dot(y, X)
# XXX could use corr /= row_norms(X.T) here, but the test doesn't pass
corr /= np.asarray(np.sqrt((X ** 2).sum(axis=0))).ravel()
corr /= norm(y)
# convert to p-value
degrees_of_freedom = y.size - (2 if center else 1)
F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
pv = stats.f.sf(F, 1, degrees_of_freedom)
return F, pv
示例12: mean_shift
def mean_shift(X, bandwidth, seeds, kernel_update_function, max_iterations=10):
n_points, n_features = X.shape
stop_thresh = 1e-3 * bandwidth # when mean has converged
cluster_centers = []
ball_tree = BallTree(X) # to efficiently look up nearby points
# For each seed, climb gradient until convergence or max_iterations
for weighted_mean in seeds:
completed_iterations = 0
while True:
points_within = X[ball_tree.query_radius([weighted_mean], bandwidth*3)[0]]
old_mean = weighted_mean # save the old mean
weighted_mean = kernel_update_function(old_mean, points_within, bandwidth)
converged = extmath.norm(weighted_mean - old_mean) < stop_thresh
if converged or completed_iterations == max_iterations:
cluster_centers.append(weighted_mean)
break
completed_iterations += 1
return cluster_centers
示例13: _iter
def _iter(X,
weighted_mean,
kernel_update_function,
bandwidth,
ball_tree,
stop_thresh,
max_iter):
"""Return the cluster center and the within points visited while iterated from the seed
to the centroid. This code has been isolated to be executed in parallel using JobLib."""
visited_points = set()
completed_iterations = 0
while True:
within_idx = ball_tree.query_radius([weighted_mean], bandwidth*3)[0]
[visited_points.add(x) for x in within_idx]
points_within = X[within_idx]
old_mean = weighted_mean # save the old mean
weighted_mean = kernel_update_function(old_mean, points_within, bandwidth)
converged = extmath.norm(weighted_mean - old_mean) < stop_thresh
if converged or completed_iterations == max_iter:
return weighted_mean, visited_points
completed_iterations += 1
示例14: mean_shift
def mean_shift(X, bandwidth=None, seeds=None, kernel="flat",
max_cluster_radius=-1., max_iterations=300):
"""Perform MeanShift Clustering of data using the specified kernel
Parameters
----------
X : array [n_samples, n_features]
Input points to be clustered
bandwidth : float,
Kernel bandwidth
seeds: array [n_seeds, n_features], optional
Points used as initial kernel locations
If not set, then use every point as a seed (which may
be very slow---consider using the `get_bin_seeds` function
to create a reduced set of seeds.
max_cluster_radius: float, default -1.
Used only in post-processing.
If negative, then each point is clustered into its nearest cluster.
If positive, then those points that are not within `max_cluster_radius`
of any cluster center are said to be 'orphans' that do not belong to
any cluster. Orphans are given cluster label -1.
Returns
-------
cluster_centers : array [n_clusters, n_features]
Coordinates of cluster centers
labels : array [n_samples]
cluster labels for each point
Notes
-----
See examples/plot_meanshift.py for an example.
"""
if seeds is None:
seeds = X
elif len(seeds) == 0:
raise ValueError, "If a list of seeds is provided it cannot be empty."
if not (kernel in KERNELS):
valid_kernels = " ".join(KERNELS)
raise ValueError, "Kernel %s is not valid. Valid kernel choices are: %s " % (kernel, valid_kernels)
# Set maximum neighbor query distance based on kernel
if kernel in ["flat"]:
query_distance = bandwidth
kernel_update_function = flat_kernel_update
print "Using flat kernel update"
elif kernel in ["gaussian"]:
query_distance = bandwidth * 3 # A bit arbitrary
kernel_update_function = gaussian_kernel_update
print "Using gaussian kernel update"
else:
raise ValueError, "Kernel %s not implemented correctly" % kernel
n_points, n_features = X.shape
stop_thresh = 1e-3 * bandwidth # when mean has converged
center_intensity_dict = {}
ball_tree = BallTree(X) # to efficiently look up nearby points
# For each seed, climb gradient until convergence or max_iterations
for weighted_mean in seeds:
completed_iterations = 0
while True:
# Find mean of points within bandwidth
points_within = X[ball_tree.query_radius([weighted_mean], query_distance)[0]]
if len(points_within) == 0:
break # Depending on seeding strategy this condition may occur
old_mean = weighted_mean # save the old mean
weighted_mean = kernel_update_function(old_mean, points_within, bandwidth)
# If converged or at max_iterations, addS the cluster
if extmath.norm(weighted_mean - old_mean) < stop_thresh or \
completed_iterations == max_iterations:
center_intensity_dict[tuple(weighted_mean)] = len(points_within)
break
completed_iterations += 1
# POST PROCESSING: remove near duplicate points
# If the distance between two kernels is less than the bandwidth,
# then we have to remove one because it is a duplicate. Remove the
# one with fewer points.
print "%d clusters before removing duplicates " % len(center_intensity_dict)
sorted_by_intensity = sorted(center_intensity_dict.items(),
key=lambda tup: tup[1], reverse=True)
sorted_centers = np.array([tup[0] for tup in sorted_by_intensity])
unique = np.ones(len(sorted_centers), dtype=np.bool)
cc_tree = BallTree(sorted_centers)
for i, center in enumerate(sorted_centers):
if unique[i]:
neighbor_idxs = cc_tree.query_radius([center], bandwidth)[0]
unique[neighbor_idxs] = 0
unique[i] = 1 # leave the current point as unique
cluster_centers = sorted_centers[unique]
#.........这里部分代码省略.........
示例15: variable_bw_mean_shift
def variable_bw_mean_shift(X, bandwidth_array, seeds=None, max_iterations=300):
"""Variable bandwidth mean shift with gaussian kernel
Parameters
----------
X : array-like, shape=[n_samples, n_features]
Input data.
bandwidth : array[float], shape=[n_samples]
Kernel bandwidth.
seeds : array[float, float], shape=(n_seeds, n_features), optional
Point used as initial kernel locations. Default is
setting each point in input data as a seed.
max_iter : int, default 300
Maximum number of iterations, per seed point before the clustering
operation terminates (for that seed point), if has not converged yet.
Returns
-------
cluster_centers : array, shape=[n_clusters, n_features]
Coordinates of cluster centers.
labels : array, shape=[n_samples]
Cluster labels for each point.
Notes
-----
Code adapted from scikit-learn library.
"""
if not seeds:
seeds = X
n_points, n_features = X.shape
stop_thresh = 1e-3 * np.mean(bandwidth_array) # when mean has converged
center_intensity_dict = {}
cluster_centers = []
ball_tree = BallTree(X) # to efficiently look up nearby points
def gaussian_kernel(x, points, bandwidth):
distances = euclidean_distances(points, x)
weights = np.exp(-1 * (distances ** 2 / bandwidth ** 2))
return np.sum(points * weights, axis=0) / np.sum(weights)
# For each seed, climb gradient until convergence or max_iterations
for i, weighted_mean in enumerate(seeds):
completed_iterations = 0
while True:
points_within = X[ball_tree.query_radius([weighted_mean], bandwidth_array[i])[0]]
old_mean = weighted_mean # save the old mean
weighted_mean = gaussian_kernel(old_mean, points_within, bandwidth_array[i])
converged = extmath.norm(weighted_mean - old_mean) < stop_thresh
if converged or completed_iterations == max_iterations:
if completed_iterations == max_iterations:
print("reached max iterations")
cluster_centers.append(weighted_mean)
center_intensity_dict[tuple(weighted_mean)] = len(points_within)
break
completed_iterations += 1
# POST PROCESSING: remove near duplicate points
# If the distance between two kernels is less than the bandwidth,
# then we have to remove one because it is a duplicate. Remove the
# one with fewer points.
sorted_by_intensity = sorted(center_intensity_dict.items(), key=lambda tup: tup[1], reverse=True)
sorted_centers = np.array([tup[0] for tup in sorted_by_intensity])
unique = np.ones(len(sorted_centers), dtype=np.bool)
ball_tree = BallTree(sorted_centers)
for i, center in enumerate(sorted_centers):
if unique[i]:
neighbor_idxs = ball_tree.query_radius([center], np.mean(bandwidth_array))[0]
unique[neighbor_idxs] = 0
unique[i] = 1 # leave the current point as unique
cluster_centers = sorted_centers[unique]
# ASSIGN LABELS: a point belongs to the cluster that it is closest to
nbrs = NearestNeighbors(n_neighbors=1, algorithm="ball_tree").fit(cluster_centers)
labels = np.zeros(n_points, dtype=np.int)
distances, idxs = nbrs.kneighbors(X)
labels = idxs.flatten()
return cluster_centers, labels