本文整理匯總了Python中sklearn.svm.OneClassSVM.decision_function方法的典型用法代碼示例。如果您正苦於以下問題:Python OneClassSVM.decision_function方法的具體用法?Python OneClassSVM.decision_function怎麽用?Python OneClassSVM.decision_function使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類sklearn.svm.OneClassSVM
的用法示例。
在下文中一共展示了OneClassSVM.decision_function方法的11個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: runClassifier
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import decision_function [as 別名]
def runClassifier(self, _driverId, numComponents=0):
X = self.featuresHash.values()
self.ids = self.featuresHash.keys()
if self.runDimRed:
X = self.dimRed(X, numComponents)
clf = OCSVM(nu=self.nu, gamma=self.gamma)
clf.fit(X)
y_pred = clf.decision_function(X).ravel()
threshold = stats.scoreatpercentile(y_pred, 100 * self.outliers_fraction)
self.label = y_pred > threshold
self.label = map(int, self.label)
示例2: embed_dat_matrix_two_dimensions
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import decision_function [as 別名]
def embed_dat_matrix_two_dimensions(low_dimension_data_matrix,
y=None,
labels=None,
density_colormap='Blues',
instance_colormap='YlOrRd'):
from sklearn.preprocessing import scale
low_dimension_data_matrix = scale(low_dimension_data_matrix)
# make mesh
x_min, x_max = low_dimension_data_matrix[:, 0].min(), low_dimension_data_matrix[:, 0].max()
y_min, y_max = low_dimension_data_matrix[:, 1].min(), low_dimension_data_matrix[:, 1].max()
step_num = 50
h = min((x_max - x_min) / step_num, (y_max - y_min) / step_num) # step size in the mesh
b = h * 10 # border size
x_min, x_max = low_dimension_data_matrix[:, 0].min() - b, low_dimension_data_matrix[:, 0].max() + b
y_min, y_max = low_dimension_data_matrix[:, 1].min() - b, low_dimension_data_matrix[:, 1].max() + b
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# induce a one class model to estimate densities
from sklearn.svm import OneClassSVM
gamma = max(x_max - x_min, y_max - y_min)
clf = OneClassSVM(gamma=gamma, nu=0.1)
clf.fit(low_dimension_data_matrix)
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max] . [y_min, y_max].
if hasattr(clf, "decision_function"):
score_matrix = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
else:
score_matrix = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
# Put the result into a color plot
levels = np.linspace(min(score_matrix), max(score_matrix), 40)
score_matrix = score_matrix.reshape(xx.shape)
if y is None:
y = 'white'
plt.contourf(xx, yy, score_matrix, cmap=plt.get_cmap(density_colormap), alpha=0.9, levels=levels)
plt.scatter(low_dimension_data_matrix[:, 0], low_dimension_data_matrix[:, 1],
alpha=.5,
s=70,
edgecolors='gray',
c=y,
cmap=plt.get_cmap(instance_colormap))
# labels
if labels is not None:
for id in range(low_dimension_data_matrix.shape[0]):
label = labels[id]
x = low_dimension_data_matrix[id, 0]
y = low_dimension_data_matrix[id, 1]
plt.annotate(label, xy=(x, y), xytext=(0, 0), textcoords='offset points')
示例3: find_anomaly
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import decision_function [as 別名]
def find_anomaly(label1, label2, winsize):
print("Find anomaly in channel", label1 + '-' + label2 + '...', file=sys.stderr)
print("-"*80)
print("Channel [" + label1 + '-' + label2 + ']')
print("-"*80)
# find difference
electrode1 = eeg.chan_lab.index(label1)
electrode2 = eeg.chan_lab.index(label2)
wave = eeg.X[electrode1] - eeg.X[electrode2]
# # import random
# wave = [random.uniform(-20,20) for _ in range(400*30)] + [random.uniform(-2000,2000) for _ in range(5*30)]
# wave = np.array(wave)
print("Splitting into windows...", file=sys.stderr)
wave_windows = np.array_split(wave, len(wave)/eeg.sample_rate/winsize)
# wave_windows = np.array_split(wave, len(wave)/winsize)
print("Extracting features...", file=sys.stderr)
def extract_features(wave_window):
max_val = max(wave_window)
min_val = min(wave_window)
stdev = np.std(wave_window)
sum_val = sum(wave_window)
sum_pos_val = sum([x for x in wave_window if x > 0])
sum_abs_val = sum([abs(x) for x in wave_window])
return [max_val, min_val, stdev, sum_val, sum_pos_val, sum_abs_val]
Examples = np.array(map(extract_features, wave_windows))
print("Training model, assuming no more than", CONTAMINATION, "anomaly...", file=sys.stderr)
od = OneClassSVM(nu=CONTAMINATION, kernel='poly', gamma=0.05, max_iter=100000)
od.fit(Examples)
decisions = od.decision_function(Examples)
# print decisions
# print max(decisions), min(decisions)
print("Most likely windows with anomaly:")
# find most likely windows, in desc order
largest_indices = np.argsort((-np.absolute(decisions)).ravel())[:20]
for large_index in largest_indices:
print(large_index*winsize/60, "min (score:", decisions[large_index][0], ")")
sys.stdout.flush()
示例4: remove_outliers_SVM
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import decision_function [as 別名]
def remove_outliers_SVM(self):
## Remove outliers using a OneClassSVM method
print "Running SVM to remove outliers..."
svm = OneClassSVM(kernel='rbf', nu=0.1, degree=3, verbose=1)
fit = svm.fit(self.DataArray)
decision = svm.decision_function(self.DataArray)
_indices = []
# If a value is below the decision hyperplane, eliminate it
for i in range(len(decision)):
if decision[i] < 0:
_indices.append(i)
print self.DataArray.shape
self.DataArray = np.delete(self.DataArray, _indices, axis=0)
self.TargetArray = np.delete(self.TargetArray, _indices, axis=0)
print self.DataArray.shape
示例5: decision_function
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import decision_function [as 別名]
def decision_function(self, data):
return -OneClassSVM.decision_function(self, data)
示例6: predict_header_features
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import decision_function [as 別名]
def predict_header_features(self, pkt_featurizer):
group_id = pkt_featurizer.pkt_type
features = pkt_featurizer.features
arrival_time = pkt_featurizer.arrival_time
try:
vectorizer = DictVectorizer()
vectorizer.fit(self.training_data[group_id])
training_data_vectorized = vectorizer.transform(self.training_data[group_id])
features_vectorized = vectorizer.transform(features)
scaler = preprocessing.StandardScaler(with_mean=False)
training_data_vectorized = scaler.fit_transform(training_data_vectorized)
features_vectorized = scaler.transform(features_vectorized)
classifier = OneClassSVM()
classifier.fit(training_data_vectorized)
result = classifier.predict(features_vectorized)
distance = classifier.decision_function(features_vectorized)
except KeyError:
result = 0
distance = 0
return result, distance
示例7: print
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import decision_function [as 別名]
unif = np.random.uniform(lim_inf, lim_sup,
size=(n_generated, n_features))
# fit:
print('IsolationForest processing...')
iforest = IsolationForest()
iforest.fit(X_train)
s_X_iforest = iforest.decision_function(X_test)
print('LocalOutlierFactor processing...')
lof = LocalOutlierFactor(n_neighbors=20)
lof.fit(X_train)
s_X_lof = lof.decision_function(X_test)
print('OneClassSVM processing...')
ocsvm = OneClassSVM()
ocsvm.fit(X_train[:min(ocsvm_max_train, n_samples_train - 1)])
s_X_ocsvm = ocsvm.decision_function(X_test).reshape(1, -1)[0]
s_unif_iforest = iforest.decision_function(unif)
s_unif_lof = lof.decision_function(unif)
s_unif_ocsvm = ocsvm.decision_function(unif).reshape(1, -1)[0]
plt.subplot(121)
auc_iforest, em_iforest, amax_iforest = em(t, t_max,
volume_support,
s_unif_iforest,
s_X_iforest, n_generated)
auc_lof, em_lof, amax_lof = em(t, t_max, volume_support,
s_unif_lof, s_X_lof, n_generated)
auc_ocsvm, em_ocsvm, amax_ocsvm = em(t, t_max, volume_support,
s_unif_ocsvm, s_X_ocsvm,
n_generated)
示例8: print
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import decision_function [as 別名]
X_test = X[n_samples_train:, :]
y_train = y[:n_samples_train]
y_test = y[n_samples_train:]
# # training only on normal data:
# X_train = X_train[y_train == 0]
# y_train = y_train[y_train == 0]
print('OneClassSVM processing...')
model = OneClassSVM(cache_size=500)
tstart = time()
model.fit(X_train)
fit_time += time() - tstart
tstart = time()
scoring = -model.decision_function(X_test) # the lower,the more normal
predict_time += time() - tstart
fpr_, tpr_, thresholds_ = roc_curve(y_test, scoring)
if fit_time + predict_time > max_time:
raise TimeoutError
f = interp1d(fpr_, tpr_)
tpr += f(x_axis)
tpr[0] = 0.
precision_, recall_ = precision_recall_curve(y_test, scoring)[:2]
# cluster: old version of scipy -> interpol1d needs sorted x_input
arg_sorted = recall_.argsort()
recall_ = recall_[arg_sorted]
示例9: main
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import decision_function [as 別名]
def main():
usage="refine2d using simmx information "
parser = EMArgumentParser(usage=usage,version=EMANVERSION)
parser.add_argument("--ptcls", type=str,help="particle file", default=None)
parser.add_argument("--simmx", type=str,help="simmx", default=None)
parser.add_argument("--npca", type=int,help="number of pca factors", default=10)
parser.add_argument("--niter", type=int,help="number of iterations", default=5)
parser.add_argument("--outlier", type=float,help="outlier fraction", default=0.1)
parser.add_argument("--ncls", type=int,help="number of centers", default=128)
parser.add_argument("--nref", type=int,help="number of references", default=32)
(options, args) = parser.parse_args()
logid=E2init(sys.argv)
simmxfile=options.simmx
for itr in range(options.niter):
### start from the simmx
print "Pre-processing simmx"
e=EMData(simmxfile)
pts=e.numpy().T.copy()
for i in range(len(pts)):
pts[i]-=np.mean(pts[i])
pts[i]/=np.std(pts[i])
pts=pts.astype(np.float).copy();
#e=from_numpy(pts.T.copy())
#e.write_image("simmx_tmp.hdf")
#exit()
print "Doing PCA"
(nptcl, ncls) = pts.shape;
#nfac=options.npca
pca=PCA(options.npca)
pts_pca=pca.fit_transform(pts)
bs=pts_pca
bs/=np.std(bs)
print bs.shape,pts.shape
np.savetxt("test_pca_{:02d}".format(itr),pts_pca)
print "Removing outliers"
outliers_fraction=options.outlier
svm=OneClassSVM(nu=0.95 * outliers_fraction + 0.05,kernel="rbf", gamma=0.1)
svm.fit(bs)
y_pred = svm.decision_function(bs).ravel()
nkeep=int(len(bs)*(1-outliers_fraction))
st=np.argsort(y_pred)[::-1]
st=st[:nkeep]
print "Clustering"
ncnt=options.ncls
centroids,_ = kmeans(bs[st],ncnt)
l,_ = vq(bs[st],centroids)
labels=np.zeros(len(bs))-1
labels[st]=l
print "Class averaging"
e=EMData(1,len(labels))
for i in range(len(labels)):
e.set_value_at(0,i,labels[i])
clsmxfile="clsmx_{:02d}.hdf".format(itr)
e.write_image(clsmxfile)
clsout="classes_{:02d}.hdf".format(itr)
run("e2classaverage.py --input={} --classmx={} --output={} --force --center xform.center --iter=5 --align=rotate_translate_flip:maxshift=32 --averager=mean --keep=.6 --cmp=ccc --aligncmp=ccc --normproc=normalize --parallel=thread:12".format(options.ptcls,clsmxfile,clsout))
simmxfile="simmx_{:02d}.hdf".format(itr)
run("e2simmx.py {} {} {} --align rotate_translate_flip --aligncmp ccc --cmp ccc --saveali --parallel thread:12".format(options.ptcls, clsout, simmxfile))
E2end(logid)
示例10: select_candidates
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import decision_function [as 別名]
def select_candidates(X, h, objective_function, verbose=False,
cov_computation_method=empirical_covariance):
"""Finds the best pure subset of observations to compute MCD from it.
The purpose of this function is to find the best sets of h
observations with respect to a minimization of their covariance
matrix determinant. Equivalently, it removes n_samples-h
observations to construct what we call a pure data set (i.e. not
containing outliers). The list of the observations of the pure
data set is referred to as the `support`.
Starting from a support estimated with a Parzen density estimator,
the pure data set is found by the c_step procedure introduced by
Rousseeuw and Van Driessen in [1].
Parameters
----------
X: array-like, shape (n_samples, n_features)
Data (sub)set in which we look for the h purest observations
h: int, [(n + p + 1)/2] < h < n
The number of samples the pure data set must contain.
select: int, int > 0
Number of best candidates results to return.
See
---
`c_step` function
Returns
-------
best_locations: array-like, shape (select, n_features)
The `select` location estimates computed from the `select` best
supports found in the data set (`X`)
best_covariances: array-like, shape (select, n_features, n_features)
The `select` covariance estimates computed from the `select`
best supports found in the data set (`X`)
best_supports: array-like, shape (select, n_samples)
The `select` best supports found in the data set (`X`)
Notes
-----
References:
[1] A Fast Algorithm for the Minimum Covariance Determinant Estimator,
1999, American Statistical Association and the American Society
for Quality, TECHNOMETRICS
"""
n_samples, n_features = X.shape
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.svm import OneClassSVM
pairwise_distances = np.ravel(euclidean_distances(X))
delta = sp.stats.scoreatpercentile(pairwise_distances, 10)
gamma = 0.01 / delta
clf = OneClassSVM(kernel='rbf', gamma=gamma)
clf.fit(X)
in_support = np.argsort(
-np.ravel(clf.decision_function(X)))[-(n_samples / 2):]
support = np.zeros(n_samples, dtype=bool)
support[in_support] = True
location = X[support].mean(0)
covariance = cov_computation_method(X[support])
initial_estimates = (location, covariance)
best_location, best_covariance, _, best_support = c_step(
X, h, objective_function, initial_estimates, verbose=verbose,
cov_computation_method=cov_computation_method)
return best_location, best_covariance, best_support
示例11: OneClassSVM
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import decision_function [as 別名]
import numpy as np
import pandas as pd
from sklearn.svm import OneClassSVM
df = pd.read_csv('kddcup_for_elki_100000.csv', header=None, index_col=False)
labelix = df.shape[1]-1
labels = df[labelix]
df = df.drop(labelix, axis=1)
svm = OneClassSVM(kernel='rbf', gamma=1.0/df.shape[0], tol=0.001, nu=0.5, shrinking=True, cache_size=80)
svm = svm.fit(df.values)
scores = svm.decision_function(df.values).flatten()
maxvalue = np.max(scores)
scores = maxvalue - scores
output = pd.DataFrame()
# perform reverse sort
sort_ix = np.argsort(scores)[::-1]
output['labels'] = labels[sort_ix]
output['outlier_scores'] = scores[sort_ix]
output.to_csv('outlier_scores.csv', header=None, index=None)
開發者ID:JingqinGao,項目名稱:unsupervised_anomaly_detection_sod_vs_one_class_svm,代碼行數:28,代碼來源:one_class_svm_sklearn.py