本文整理汇总了Python中sklearn.svm.OneClassSVM类的典型用法代码示例。如果您正苦于以下问题:Python OneClassSVM类的具体用法?Python OneClassSVM怎么用?Python OneClassSVM使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了OneClassSVM类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: svm_model
class svm_model():
def train(self, X, ker):
self.model = OneClassSVM(kernel=ker, shrinking=True,random_state=1)
self.model.fit(X)
def predict(self, X):
return self.model.predict(X)
示例2: main
def main():
n = 1000
data = []
for i in range(n):
data.append(np.array([np.random.randint(0, 5000) for i in range(np.random.randint(20, 150))]))
data = np.array(data)
# making all the data into 5 dimensions
# howto : boxplot
x = []
y = []
for i in data:
sorted_i = sorted(i)
x.append([max(sorted_i), np.percentile(sorted_i, 75), np.median(sorted_i), np.percentile(sorted_i, 25), min(sorted_i)])
y.append(0)
x = np.array(x)
'''
# making all the data into 5 dimensions
# howto : distance
start = time.time()
data_i = 0
cnt = 1
x = np.zeros((n, n))
for i in data:
data_j = data_i
for j in data[cnt:]:
dist = dtw(i, j, dist=lambda i, j: norm(i - j, ord=1))[0]
x[data_i][data_j+1], x[data_j+1][data_i] = dist, dist
data_j += 1
cnt += 1
data_i += 1
end = time.time()
print(end - start)
'''
# build model with x
model = OneClassSVM()
model.fit(x)
# create test dataset
test = []
for i in range(10):
test.append(np.array([np.random.randint(0, 10000) for i in range(np.random.randint(20000, 30000))]))
test = np.array(test)
# transform test dataset
x = []
y = []
for i in test:
sorted_i = sorted(i)
x.append([max(sorted_i), np.percentile(sorted_i, 75), np.median(sorted_i), np.percentile(sorted_i, 25), min(sorted_i)])
y.append(0)
x = np.array(x)
# predict test dataset
pred = model.predict(x)
'''
示例3: fit
def fit(self, X, Y, W):
clf = OneClassSVM(kernel=self.kernel, degree=self.degree,
gamma=self.gamma, coef0=self.coef0, tol=self.tol,
nu=self.nu, shrinking=self.shrinking,
cache_size=self.cache_size, max_iter=self.max_iter)
if W is not None:
return OneClassSVMClassifier(clf.fit(X, W.reshape(-1)))
return OneClassSVMClassifier(clf.fit(X))
示例4: Cluster
class Cluster(object):
def __init__(self, name):
self.name = name
self.raw_dataset = []
self.dataset = []
self.dataset_red = []
def get_featurevec(self, data):
'''Takes in data in the form of an array of EmoPackets, and outputs
a list of feature vectors.'''
# CHECKED, all good :)
num_bins = (len(data)/int(dsp.SAMPLE_RATE*dsp.STAGGER) -
int(dsp.BIN_SIZE / dsp.STAGGER) + 1)
size = int(dsp.BIN_SIZE*dsp.SAMPLE_RATE)
starts = int(dsp.SAMPLE_RATE*dsp.STAGGER)
points = []
for i in range(num_bins):
points.append(dsp.get_features(data[i*starts:i*starts+size]))
return points
def add_data(self, raw):
'''Allows the addition of new data. Will retrain upon addition.
Expects a list of EmoPackets.'''
self.dataset.extend(self.get_featurevec(raw))
def extract_features(self):
'''Does feature extraction for all of the datasets.'''
self.dataset = []
for sess in self.raw_dataset:
self.dataset.extend(self.get_featurevec(sess))
def reduce_dim(self, NDIM=5):
'''Reduces the dimension of the extracted feature vectors.'''
X = np.array(self.dataset)
self.pca = RandomizedPCA(n_components=NDIM).fit(X)
self.dataset_red = self.pca.transform(X)
def train(self):
'''Trains the classifier.'''
self.svm = OneClassSVM()
self.svm.fit(self.dataset_red)
def is_novel(self, pt):
'''Says whether or not the bin is novel. Expects an array of EmoPackets'''
X = self.pca.transform(np.array(self.get_featurevec(data)[0]))
ans = self.svm.predict(X)
self.dataset_red.append(X)
self.train()
return ans
def save(self):
'''Saves this classifier to a data directory.'''
this_dir, this_filename = os.path.split(__file__)
DATA_PATH = os.path.join(this_dir, "data", self.name+'.pkl')
dumpfile = open(DATA_PATH, "wb")
pickle.dump(self, dumpfile, pickle.HIGHEST_PROTOCOL)
dumpfile.close()
示例5: determine_test_similarity
def determine_test_similarity(self, model):
clf_OCSVM = {}
model_OCSVM = {}
for i in range(len(model)):
clf = OneClassSVM(kernel='rbf', nu=0.1, gamma=.023)
clf_OCSVM[i] = clf
OCSVMmodel = clf.fit(model[i])
model_OCSVM[i] = OCSVMmodel
return clf_OCSVM, model_OCSVM
示例6: select_best_support_vectors
def select_best_support_vectors(data, nu=0.01, all_gammas=2 ** np.arange(-10, 10, 1)):
all_errors = []
for gamma in all_gammas:
clf = OneClassSVM(nu=nu, gamma=gamma)
clf.fit(data)
prediction = clf.predict(data)
out_of_class_count = np.sum(prediction == -1)
support_vectors_count = len(clf.support_vectors_)
error = (float(out_of_class_count) / len(data) - nu) ** 2
error += (float(support_vectors_count) / len(data) - nu) ** 2
all_errors.append(error)
index = np.argmin(all_errors)
return all_gammas[index], all_errors
示例7: embed_dat_matrix_two_dimensions
def embed_dat_matrix_two_dimensions(low_dimension_data_matrix,
y=None,
labels=None,
density_colormap='Blues',
instance_colormap='YlOrRd'):
from sklearn.preprocessing import scale
low_dimension_data_matrix = scale(low_dimension_data_matrix)
# make mesh
x_min, x_max = low_dimension_data_matrix[:, 0].min(), low_dimension_data_matrix[:, 0].max()
y_min, y_max = low_dimension_data_matrix[:, 1].min(), low_dimension_data_matrix[:, 1].max()
step_num = 50
h = min((x_max - x_min) / step_num, (y_max - y_min) / step_num) # step size in the mesh
b = h * 10 # border size
x_min, x_max = low_dimension_data_matrix[:, 0].min() - b, low_dimension_data_matrix[:, 0].max() + b
y_min, y_max = low_dimension_data_matrix[:, 1].min() - b, low_dimension_data_matrix[:, 1].max() + b
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
# induce a one class model to estimate densities
from sklearn.svm import OneClassSVM
gamma = max(x_max - x_min, y_max - y_min)
clf = OneClassSVM(gamma=gamma, nu=0.1)
clf.fit(low_dimension_data_matrix)
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max] . [y_min, y_max].
if hasattr(clf, "decision_function"):
score_matrix = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
else:
score_matrix = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
# Put the result into a color plot
levels = np.linspace(min(score_matrix), max(score_matrix), 40)
score_matrix = score_matrix.reshape(xx.shape)
if y is None:
y = 'white'
plt.contourf(xx, yy, score_matrix, cmap=plt.get_cmap(density_colormap), alpha=0.9, levels=levels)
plt.scatter(low_dimension_data_matrix[:, 0], low_dimension_data_matrix[:, 1],
alpha=.5,
s=70,
edgecolors='gray',
c=y,
cmap=plt.get_cmap(instance_colormap))
# labels
if labels is not None:
for id in range(low_dimension_data_matrix.shape[0]):
label = labels[id]
x = low_dimension_data_matrix[id, 0]
y = low_dimension_data_matrix[id, 1]
plt.annotate(label, xy=(x, y), xytext=(0, 0), textcoords='offset points')
示例8: svm
def svm(data, fraction=0.05, kernel='poly', degree=3, gamma=0, coeff=0):
svm = OneClassSVM(kernel=kernel, degree=degree, gamma=gamma, nu=fraction, coeff0=coeff)
svm.fit(data)
score = svm.predict(data)
numeration = [[i] for i in xrange(1, len(data)+1, 1)]
numeration = np.array(numeration)
y = np.hstack((numeration, score))
anomalies = numeration
for num,s in y:
if (y == 1):
y = np.delete(anomalies, num-1, axis=0)
return anomalies
示例9: outlier_detect
def outlier_detect(data_frame):
#pandas to numpy - digestible by scikit
columns = ['blm_tag_count','protest_count','justice_count','riot_count','breathe_count']
features = data_frame[list(columns)].values
clf = OneClassSVM(nu=0.008, gamma=0.05)
clf.fit(features)
y_pred = clf.predict(features)
mask=[y_pred==-1]
oak_array = np.asarray(data_frame.hourly)
protest_predict = oak_array[mask]
protest_hours = list(protest_predict)
return protest_hours
示例10: select_best_outlier_fraction_cross_val
def select_best_outlier_fraction_cross_val(data, nu=0.05, all_gammas=2 ** np.arange(-10, 10, 50), folds_count=7):
all_errors = []
kf_iterator = KFold(len(data), n_folds=folds_count)
for gamma in all_gammas:
error = 0
for train, test in kf_iterator:
train_data = data[train,:]
test_data = data[test,:]
clf = OneClassSVM(nu=nu, gamma=gamma)
clf.fit(train_data)
prediction = clf.predict(test_data)
outlier_fraction = np.mean(prediction == -1)
error += (nu - outlier_fraction) ** 2 + (float(clf.support_vectors_.shape[0]) / len(data) - nu) ** 2
all_errors.append(error / folds_count)
best_index = np.argmin(error)
return int(best_index), all_errors
示例11: OneClassSVMDetector
class OneClassSVMDetector(BaseOutlier):
@staticmethod
def get_attributes():
return {
"nu":0.1,
"kernel":['rbf','linear', 'poly', 'rbf', 'sigmoid', 'precomputed'],
"gamma":0.1,
}
def __init__(self,nu=0.1,kernel='rbf',gamma=0.1):
self.nu = nu
self.kernel = kernel
self.gamma = gamma
def fit(self,data=None):
self.data = data
self.check_finite(data)
if(self._is_using_pandas(data)==True):
self.data.interpolate(inplace=True)
# self.datareshap = data.reshape(-1,1)
self.clf = OneClassSVM(nu=self.nu, kernel=self.kernel, gamma=self.gamma)
self.clf.fit(data.reshape(-1,1))
# print "done"
return self
def predict(self, X_test):
y_pred_train = self.clf.predict(X_test.reshape(-1,1))
outlier_idx = np.where(y_pred_train == -1)
inlier_idx = np.where(y_pred_train == 1)
d = {
'timestamp': self.data.index[outlier_idx],
'anoms': self.data.iloc[outlier_idx]
}
anoms = pd.DataFrame(d)
self.anomaly_idx = anoms.index
self.anom_val = anoms['anoms']
return anoms
def fit_predict(self, data=None):
self.fit(data)
return self.predict(data)
def plot(self):
import matplotlib.pyplot as plt
f, ax = plt.subplots(1, 1)
ax.plot(self.data, 'b')
ax.plot(self.anomaly_idx, self.anom_val, 'ro')
ax.set_title('Detected Anomalies')
ax.set_ylabel('Count')
f.tight_layout()
return f
示例12: find_anomaly
def find_anomaly(label1, label2, winsize):
print("Find anomaly in channel", label1 + '-' + label2 + '...', file=sys.stderr)
print("-"*80)
print("Channel [" + label1 + '-' + label2 + ']')
print("-"*80)
# find difference
electrode1 = eeg.chan_lab.index(label1)
electrode2 = eeg.chan_lab.index(label2)
wave = eeg.X[electrode1] - eeg.X[electrode2]
# # import random
# wave = [random.uniform(-20,20) for _ in range(400*30)] + [random.uniform(-2000,2000) for _ in range(5*30)]
# wave = np.array(wave)
print("Splitting into windows...", file=sys.stderr)
wave_windows = np.array_split(wave, len(wave)/eeg.sample_rate/winsize)
# wave_windows = np.array_split(wave, len(wave)/winsize)
print("Extracting features...", file=sys.stderr)
def extract_features(wave_window):
max_val = max(wave_window)
min_val = min(wave_window)
stdev = np.std(wave_window)
sum_val = sum(wave_window)
sum_pos_val = sum([x for x in wave_window if x > 0])
sum_abs_val = sum([abs(x) for x in wave_window])
return [max_val, min_val, stdev, sum_val, sum_pos_val, sum_abs_val]
Examples = np.array(map(extract_features, wave_windows))
print("Training model, assuming no more than", CONTAMINATION, "anomaly...", file=sys.stderr)
od = OneClassSVM(nu=CONTAMINATION, kernel='poly', gamma=0.05, max_iter=100000)
od.fit(Examples)
decisions = od.decision_function(Examples)
# print decisions
# print max(decisions), min(decisions)
print("Most likely windows with anomaly:")
# find most likely windows, in desc order
largest_indices = np.argsort((-np.absolute(decisions)).ravel())[:20]
for large_index in largest_indices:
print(large_index*winsize/60, "min (score:", decisions[large_index][0], ")")
sys.stdout.flush()
示例13: cross_validate
def cross_validate():
#for tinkering with the model
#read data
all_df = pd.read_csv('./data/train.csv',index_col = 'ID')
#split data
zeros_df = all_df[all_df.TARGET == 0]
ones_df = all_df[all_df.TARGET == 1]
num_ones = ones_df.shape[0]
msk = np.random.permutation(len(zeros_df)) < num_ones
zeros_train_df = zeros_df[~msk]
zeros_test_df = zeros_df[msk]
ones_test_df = ones_df
train_df = zeros_train_df
test_df = pd.concat([zeros_test_df,ones_test_df])
train_X = np.array(train_df.drop('TARGET', axis = 1))
train_Y = np.array(train_df.TARGET)
test_X = np.array(test_df.drop('TARGET',axis = 1))
test_Y = np.array(test_df.TARGET) #true target values
#init svm
print('training svm')
my_svm = OneClassSVM(verbose = True)
my_svm.fit(train_X)
#predict
print('predicting')
predictions = my_svm.predict(test_X)
conf_matrix = confusion_matrix(test_Y,predictions)
print('confusion matrix:')
print(pd.DataFrame(conf_matrix,columns = [0,1]))
print('accuracy:')
print(sum(test_Y.reshape(predictions.shape) == predictions)/len(test_Y))
示例14: remove_outliers_SVM
def remove_outliers_SVM(self):
## Remove outliers using a OneClassSVM method
print "Running SVM to remove outliers..."
svm = OneClassSVM(kernel='rbf', nu=0.1, degree=3, verbose=1)
fit = svm.fit(self.DataArray)
decision = svm.decision_function(self.DataArray)
_indices = []
# If a value is below the decision hyperplane, eliminate it
for i in range(len(decision)):
if decision[i] < 0:
_indices.append(i)
print self.DataArray.shape
self.DataArray = np.delete(self.DataArray, _indices, axis=0)
self.TargetArray = np.delete(self.TargetArray, _indices, axis=0)
print self.DataArray.shape
示例15: plot_scatter
def plot_scatter(X_dict, y_dict, col1, col2, max_error, max_filled_gap, insens,
f_colors = ['yellow', 'red', 'blue'], nu=0.98, high=0.95):
planes = sorted(X_dict.keys())
planes_with_failures = sorted([key for key in X_dict.keys() if y_dict[key].sum()>0])
ocsvm = OneClassSVM(kernel='linear', nu=0.98)
X_train = pd.concat(dict([(plane, X_dict[plane][[col1, col2]].dropna())
for plane in planes_with_failures]))
ocsvm.fit(X_train.values)
qb = QuantileBinarizer(low=0.0, high=0.95, each_side=False)
qb.fit(X_train)
mask_pref = pd.concat(dict(
[(plane, get_mask_pref(y_dict[plane], max_error)) for plane in planes]), axis=0)
mask_norm = pd.concat(dict(
[(plane, get_mask_norm(y_dict[plane], max_error, insens)) for plane in planes]), axis=0)
fig = plt.figure(figsize=(15,15), dpi=100)
# plt.xlabel('Norm of res. phase: %s, group: %s' % (col1[0], str(col_groups[col1[0]][int(col1[1][-1])])))
# plt.ylabel('Norm of res. phase: %s, group: %s' % (col2[0], str(col_groups[col2[0]][int(col2[1][-1])])))
plt.xlabel(col1)
plt.ylabel(col2)
plot_norm = plt.scatter(pd.concat(X_dict)[col1].loc[mask_norm],
pd.concat(X_dict)[col2].loc[mask_norm], c='lightgrey', zorder=1, s=6)
plot_pref = []
for i, plane in enumerate(planes_with_failures):
plot_pref.append(plt.scatter(X_dict[plane][col1].loc[get_mask_pref(y_dict[plane], max_error)],
X_dict[plane][col2].loc[get_mask_pref(y_dict[plane], max_error)],
c=f_colors[i], zorder=2, s=30))
x_min, x_max, y_min, y_max = plt.axis('tight')
plt.axvline(qb._thresholds[col1]['high'], c='green')
plt.axhline(qb._thresholds[col2]['high'], c='green')
plot_line = plt.plot([x_min, x_max],
[(ocsvm.intercept_ - ocsvm.coef_[0][0] * x_min) / ocsvm.coef_[0][1],
(ocsvm.intercept_ - ocsvm.coef_[0][0] * x_max) / ocsvm.coef_[0][1]],
c='red')
# # plt.legend((plot_norm, plot_pref), ('No-failure', 'Pre-failure'),
# # scatterpoints=1, loc='upper right', ncol=1)
# #plt.savefig('./scatter/pair_group_of_fours3.png')