本文整理匯總了Python中sklearn.svm.OneClassSVM.predict方法的典型用法代碼示例。如果您正苦於以下問題:Python OneClassSVM.predict方法的具體用法?Python OneClassSVM.predict怎麽用?Python OneClassSVM.predict使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類sklearn.svm.OneClassSVM
的用法示例。
在下文中一共展示了OneClassSVM.predict方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: svm_model
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import predict [as 別名]
class svm_model():
def train(self, X, ker):
self.model = OneClassSVM(kernel=ker, shrinking=True,random_state=1)
self.model.fit(X)
def predict(self, X):
return self.model.predict(X)
示例2: main
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import predict [as 別名]
def main():
n = 1000
data = []
for i in range(n):
data.append(np.array([np.random.randint(0, 5000) for i in range(np.random.randint(20, 150))]))
data = np.array(data)
# making all the data into 5 dimensions
# howto : boxplot
x = []
y = []
for i in data:
sorted_i = sorted(i)
x.append([max(sorted_i), np.percentile(sorted_i, 75), np.median(sorted_i), np.percentile(sorted_i, 25), min(sorted_i)])
y.append(0)
x = np.array(x)
'''
# making all the data into 5 dimensions
# howto : distance
start = time.time()
data_i = 0
cnt = 1
x = np.zeros((n, n))
for i in data:
data_j = data_i
for j in data[cnt:]:
dist = dtw(i, j, dist=lambda i, j: norm(i - j, ord=1))[0]
x[data_i][data_j+1], x[data_j+1][data_i] = dist, dist
data_j += 1
cnt += 1
data_i += 1
end = time.time()
print(end - start)
'''
# build model with x
model = OneClassSVM()
model.fit(x)
# create test dataset
test = []
for i in range(10):
test.append(np.array([np.random.randint(0, 10000) for i in range(np.random.randint(20000, 30000))]))
test = np.array(test)
# transform test dataset
x = []
y = []
for i in test:
sorted_i = sorted(i)
x.append([max(sorted_i), np.percentile(sorted_i, 75), np.median(sorted_i), np.percentile(sorted_i, 25), min(sorted_i)])
y.append(0)
x = np.array(x)
# predict test dataset
pred = model.predict(x)
'''
示例3: Cluster
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import predict [as 別名]
class Cluster(object):
def __init__(self, name):
self.name = name
self.raw_dataset = []
self.dataset = []
self.dataset_red = []
def get_featurevec(self, data):
'''Takes in data in the form of an array of EmoPackets, and outputs
a list of feature vectors.'''
# CHECKED, all good :)
num_bins = (len(data)/int(dsp.SAMPLE_RATE*dsp.STAGGER) -
int(dsp.BIN_SIZE / dsp.STAGGER) + 1)
size = int(dsp.BIN_SIZE*dsp.SAMPLE_RATE)
starts = int(dsp.SAMPLE_RATE*dsp.STAGGER)
points = []
for i in range(num_bins):
points.append(dsp.get_features(data[i*starts:i*starts+size]))
return points
def add_data(self, raw):
'''Allows the addition of new data. Will retrain upon addition.
Expects a list of EmoPackets.'''
self.dataset.extend(self.get_featurevec(raw))
def extract_features(self):
'''Does feature extraction for all of the datasets.'''
self.dataset = []
for sess in self.raw_dataset:
self.dataset.extend(self.get_featurevec(sess))
def reduce_dim(self, NDIM=5):
'''Reduces the dimension of the extracted feature vectors.'''
X = np.array(self.dataset)
self.pca = RandomizedPCA(n_components=NDIM).fit(X)
self.dataset_red = self.pca.transform(X)
def train(self):
'''Trains the classifier.'''
self.svm = OneClassSVM()
self.svm.fit(self.dataset_red)
def is_novel(self, pt):
'''Says whether or not the bin is novel. Expects an array of EmoPackets'''
X = self.pca.transform(np.array(self.get_featurevec(data)[0]))
ans = self.svm.predict(X)
self.dataset_red.append(X)
self.train()
return ans
def save(self):
'''Saves this classifier to a data directory.'''
this_dir, this_filename = os.path.split(__file__)
DATA_PATH = os.path.join(this_dir, "data", self.name+'.pkl')
dumpfile = open(DATA_PATH, "wb")
pickle.dump(self, dumpfile, pickle.HIGHEST_PROTOCOL)
dumpfile.close()
示例4: select_best_support_vectors
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import predict [as 別名]
def select_best_support_vectors(data, nu=0.01, all_gammas=2 ** np.arange(-10, 10, 1)):
all_errors = []
for gamma in all_gammas:
clf = OneClassSVM(nu=nu, gamma=gamma)
clf.fit(data)
prediction = clf.predict(data)
out_of_class_count = np.sum(prediction == -1)
support_vectors_count = len(clf.support_vectors_)
error = (float(out_of_class_count) / len(data) - nu) ** 2
error += (float(support_vectors_count) / len(data) - nu) ** 2
all_errors.append(error)
index = np.argmin(all_errors)
return all_gammas[index], all_errors
示例5: NoveltySeparator
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import predict [as 別名]
class NoveltySeparator(BaseEstimator):
def get_params(self, deep=True):
return {}
def fit(self, X, y):
# lets treat users spending something in the rest of the month as outliers
inliers = y - X[:, 0]
inliers = np.where(inliers < 0.1, True, False)
self.detector = OneClassSVM(nu=0.05, cache_size=2000, verbose=True)
# training only on inliers
print("Training detector")
self.detector.fit(X[inliers])
results = self.detector.predict(X).reshape(X.shape[0])
# predicted
inliers = results == 1
outliers = results == -1
print("Training estimators")
self.est_inliers = Ridge(alpha=0.05)
self.est_outliers = Ridge(alpha=0.05)
self.est_inliers.fit(X[inliers], y[inliers])
self.est_inliers.fit(X[outliers], y[outliers])
def predict(self, X):
y = np.zeros(X.shape[0])
labels = self.detector.predict(X).reshape(X.shape[0])
inliers = lables == 1
outliers = lables == -1
y[inliers] = self.est_inliers.predict(X[inliers])
y[outliers] = self.est_outliers.predict(X[outliers])
return y
示例6: slice_probability_space_selection
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import predict [as 別名]
def slice_probability_space_selection(data, nu=0.05, all_gammas=2 ** np.linspace(-10, 10, 50),
rho=0.05, outlier_distribution = np.random.rand, folds_count=7):
kf_iterator = KFold(len(data), n_folds=folds_count)
all_errors = []
for gamma in all_gammas:
error = 0.0
clf = OneClassSVM(nu=nu, gamma=gamma)
for train, test in kf_iterator:
train_data = data[train,:]
test_data = data[test,:]
clf = OneClassSVM(nu=nu, gamma=gamma)
clf.fit(train_data)
prediction = clf.predict(test_data)
inlier_metric_part = np.mean(prediction == -1)
inlier_metric_part = inlier_metric_part / (1 + rho) / len(data)
outliers = outlier_distribution(*data.shape) - 0.5
outliers *= 8 * np.std(data)
outlier_metric_part = np.mean(clf.predict(outliers) == 1) * rho / (1 + rho) / len(outliers)
error += inlier_metric_part + outlier_metric_part
all_errors.append(error / folds_count)
index = np.argmin(all_errors)
#best_index = pd.Series(all_errors).pct_change().argmax() - 1
return int(index), all_errors
示例7: outlier_detect
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import predict [as 別名]
def outlier_detect(data_frame):
#pandas to numpy - digestible by scikit
columns = ['blm_tag_count','protest_count','justice_count','riot_count','breathe_count']
features = data_frame[list(columns)].values
clf = OneClassSVM(nu=0.008, gamma=0.05)
clf.fit(features)
y_pred = clf.predict(features)
mask=[y_pred==-1]
oak_array = np.asarray(data_frame.hourly)
protest_predict = oak_array[mask]
protest_hours = list(protest_predict)
return protest_hours
示例8: svm
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import predict [as 別名]
def svm(data, fraction=0.05, kernel='poly', degree=3, gamma=0, coeff=0):
svm = OneClassSVM(kernel=kernel, degree=degree, gamma=gamma, nu=fraction, coeff0=coeff)
svm.fit(data)
score = svm.predict(data)
numeration = [[i] for i in xrange(1, len(data)+1, 1)]
numeration = np.array(numeration)
y = np.hstack((numeration, score))
anomalies = numeration
for num,s in y:
if (y == 1):
y = np.delete(anomalies, num-1, axis=0)
return anomalies
示例9: select_best_outlier_fraction_cross_val
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import predict [as 別名]
def select_best_outlier_fraction_cross_val(data, nu=0.05, all_gammas=2 ** np.arange(-10, 10, 50), folds_count=7):
all_errors = []
kf_iterator = KFold(len(data), n_folds=folds_count)
for gamma in all_gammas:
error = 0
for train, test in kf_iterator:
train_data = data[train,:]
test_data = data[test,:]
clf = OneClassSVM(nu=nu, gamma=gamma)
clf.fit(train_data)
prediction = clf.predict(test_data)
outlier_fraction = np.mean(prediction == -1)
error += (nu - outlier_fraction) ** 2 + (float(clf.support_vectors_.shape[0]) / len(data) - nu) ** 2
all_errors.append(error / folds_count)
best_index = np.argmin(error)
return int(best_index), all_errors
示例10: OneClassSVMDetector
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import predict [as 別名]
class OneClassSVMDetector(BaseOutlier):
@staticmethod
def get_attributes():
return {
"nu":0.1,
"kernel":['rbf','linear', 'poly', 'rbf', 'sigmoid', 'precomputed'],
"gamma":0.1,
}
def __init__(self,nu=0.1,kernel='rbf',gamma=0.1):
self.nu = nu
self.kernel = kernel
self.gamma = gamma
def fit(self,data=None):
self.data = data
self.check_finite(data)
if(self._is_using_pandas(data)==True):
self.data.interpolate(inplace=True)
# self.datareshap = data.reshape(-1,1)
self.clf = OneClassSVM(nu=self.nu, kernel=self.kernel, gamma=self.gamma)
self.clf.fit(data.reshape(-1,1))
# print "done"
return self
def predict(self, X_test):
y_pred_train = self.clf.predict(X_test.reshape(-1,1))
outlier_idx = np.where(y_pred_train == -1)
inlier_idx = np.where(y_pred_train == 1)
d = {
'timestamp': self.data.index[outlier_idx],
'anoms': self.data.iloc[outlier_idx]
}
anoms = pd.DataFrame(d)
self.anomaly_idx = anoms.index
self.anom_val = anoms['anoms']
return anoms
def fit_predict(self, data=None):
self.fit(data)
return self.predict(data)
def plot(self):
import matplotlib.pyplot as plt
f, ax = plt.subplots(1, 1)
ax.plot(self.data, 'b')
ax.plot(self.anomaly_idx, self.anom_val, 'ro')
ax.set_title('Detected Anomalies')
ax.set_ylabel('Count')
f.tight_layout()
return f
示例11: cross_validate
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import predict [as 別名]
def cross_validate():
#for tinkering with the model
#read data
all_df = pd.read_csv('./data/train.csv',index_col = 'ID')
#split data
zeros_df = all_df[all_df.TARGET == 0]
ones_df = all_df[all_df.TARGET == 1]
num_ones = ones_df.shape[0]
msk = np.random.permutation(len(zeros_df)) < num_ones
zeros_train_df = zeros_df[~msk]
zeros_test_df = zeros_df[msk]
ones_test_df = ones_df
train_df = zeros_train_df
test_df = pd.concat([zeros_test_df,ones_test_df])
train_X = np.array(train_df.drop('TARGET', axis = 1))
train_Y = np.array(train_df.TARGET)
test_X = np.array(test_df.drop('TARGET',axis = 1))
test_Y = np.array(test_df.TARGET) #true target values
#init svm
print('training svm')
my_svm = OneClassSVM(verbose = True)
my_svm.fit(train_X)
#predict
print('predicting')
predictions = my_svm.predict(test_X)
conf_matrix = confusion_matrix(test_Y,predictions)
print('confusion matrix:')
print(pd.DataFrame(conf_matrix,columns = [0,1]))
print('accuracy:')
print(sum(test_Y.reshape(predictions.shape) == predictions)/len(test_Y))
示例12: predict_header_features
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import predict [as 別名]
def predict_header_features(self, pkt_featurizer):
group_id = pkt_featurizer.pkt_type
features = pkt_featurizer.features
arrival_time = pkt_featurizer.arrival_time
try:
vectorizer = DictVectorizer()
vectorizer.fit(self.training_data[group_id])
training_data_vectorized = vectorizer.transform(self.training_data[group_id])
features_vectorized = vectorizer.transform(features)
scaler = preprocessing.StandardScaler(with_mean=False)
training_data_vectorized = scaler.fit_transform(training_data_vectorized)
features_vectorized = scaler.transform(features_vectorized)
classifier = OneClassSVM()
classifier.fit(training_data_vectorized)
result = classifier.predict(features_vectorized)
distance = classifier.decision_function(features_vectorized)
except KeyError:
result = 0
distance = 0
return result, distance
示例13: TwoStage
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import predict [as 別名]
class TwoStage(object):
def __init__(self, *args, **kwargs):
super(TwoStage, self).__init__(*args, **kwargs)
self._oneCls = OneClassSVM(nu=NU, gamma=GAMMA)
self._clf = RandomForestClassifier(n_estimators=30)
self._scaler = StandardScaler()
def fit(self, data, labels):
sdata = self._scaler.fit_transform(data)
self._oneCls.fit(sdata)
self._clf.fit(sdata, labels)
return self
def predict(self, data):
sdata = self._scaler.transform(data)
is_known_cls = self._oneCls.predict(sdata)
cls = self._clf.predict(sdata)
cls[is_known_cls == -1] = "zother"
classes = list(self._clf.classes_) + ["zother"]
return cls, classes
示例14: predict_pkt_length_features
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import predict [as 別名]
def predict_pkt_length_features(self, pkt_featurizer):
group_id = pkt_featurizer.pkt_type
try:
dbscan = DBSCAN()
pkt_lengths = np.array(list(self.pkt_lengths[group_id])+[pkt_featurizer.len_bytes]).reshape(-1,1)
labels = dbscan.fit_predict(pkt_lengths)
dbscan_prediction = labels[-1] == -1
if self.plot:
self.plot_1d_dbscan(pkt_lengths, labels, range(len(pkt_lengths)), self.pkt_lengths_fig_dbscan,
"", "Pkt Length", "Pkt Length DBSCAN Clustering - Anomalous Pkts in Black")
one_class_svm = OneClassSVM()
scaler = preprocessing.StandardScaler()
pkt_lengths_scaled = scaler.fit_transform(np.array(self.pkt_lengths[group_id]).reshape(-1,1))
features_scaled = scaler.transform(np.array(pkt_featurizer.len_bytes).reshape(1,-1))
one_class_svm.fit(pkt_lengths_scaled)
svm_prediction = one_class_svm.predict(features_scaled)
if self.plot and len(pkt_lengths_scaled) > 2:
self.plot_1d_svm(self.pkt_lengths[group_id], one_class_svm, range(len(self.pkt_lengths[group_id])), scaler, self.pkt_lengths_fig_svm,
"Pkt", "Pkt Length", "Pkt Length One Class SVM Classification")
except (KeyError, IndexError) as e:
print e
dbscan_prediction = 0
return dbscan_prediction
示例15: predict_rate_features
# 需要導入模塊: from sklearn.svm import OneClassSVM [as 別名]
# 或者: from sklearn.svm.OneClassSVM import predict [as 別名]
def predict_rate_features(self, pkt_featurizer):
group_id = pkt_featurizer.pkt_type
features = pkt_featurizer.features
arrival_time = pkt_featurizer.arrival_time
try:
if len(self.time_delta3[group_id]) <= 1:
raise ValueError
td1 = arrival_time - self.time_data[group_id][-1]
td2 = td1 - self.time_delta1[group_id][-1]
td3 = td2 - self.time_delta2[group_id][-1]
"""
if self.plot:
self.t_fig.cla()
self.prep_figure(self.t_fig, "Time", "Pkt", grid=True)
self.t_fig.scatter(self.time_data[group_id], range(len(self.time_data[group_id])))
"""
dbscan1 = DBSCAN()
dbscan2 = DBSCAN()
dbscan3 = DBSCAN()
td1_training = np.array(list(self.time_delta1[group_id]) + [td1]).reshape(-1,1)
td2_training = np.array(list(self.time_delta2[group_id]) + [td2]).reshape(-1,1)
td3_training = np.array(list(self.time_delta3[group_id]) + [td3]).reshape(-1,1)
labels1 = dbscan1.fit_predict(td1_training)
labels2 = dbscan2.fit_predict(td2_training)
labels3 = dbscan3.fit_predict(td3_training)
db_predict1 = labels1[-1] == -1
db_predict2 = labels2[-1] == -1
db_predict3 = labels3[-1] == -1
if self.plot:
self.plot_1d_dbscan(td1_training, labels1,
list(self.time_data[group_id])[(len(self.time_data[group_id])-len(self.time_delta1[group_id])) :]+[arrival_time],
self.td1_fig_dbscan, "", "Pkt/Time", "Pkt Rate DBSCAN Clustering - Anomalous Pkts in Black")
self.plot_1d_dbscan(td2_training, labels2,
list(self.time_data[group_id])[(len(self.time_data[group_id])-len(self.time_delta2[group_id])) :]+[arrival_time],
self.td2_fig_dbscan, "", "Pkt/Time^2")
self.plot_1d_dbscan(td3_training, labels3,
list(self.time_data[group_id])[(len(self.time_data[group_id])-len(self.time_delta3[group_id])) :]+[arrival_time],
self.td3_fig_dbscan, "Time", "Pkt/Time^3")
scaler1 = preprocessing.StandardScaler()
scaler2 = preprocessing.StandardScaler()
scaler3 = preprocessing.StandardScaler()
time_training1 = scaler1.fit_transform(np.array(self.time_delta1[group_id]).reshape(-1,1))
time_features1 = scaler1.transform(np.array(td1).reshape(1,-1))
time_training2 = scaler2.fit_transform(np.array(self.time_delta2[group_id]).reshape(-1,1))
time_features2 = scaler2.transform(np.array(td2).reshape(1,-1))
time_training3 = scaler3.fit_transform(np.array(self.time_delta3[group_id]).reshape(-1,1))
time_features3 = scaler3.transform(np.array(td3).reshape(1,-1))
time_classifier1 = OneClassSVM().fit(time_training1)
time_prediction1 = time_classifier1.predict(time_features1)
time_classifier2 = OneClassSVM().fit(time_training2)
time_prediction2 = time_classifier2.predict(time_features2)
time_classifier3 = OneClassSVM().fit(time_training3)
time_prediction3 = time_classifier3.predict(time_features3)
if self.plot:
self.plot_1d_svm(self.time_delta1[group_id], time_classifier1,
list(self.time_data[group_id])[(len(self.time_data[group_id])-len(self.time_delta1[group_id])) :],
scaler1, self.td1_fig_svm, "", "Pkt/Time", "Pkt Rate One Class SVM Classification")
self.plot_1d_svm(self.time_delta2[group_id], time_classifier2,
list(self.time_data[group_id])[(len(self.time_data[group_id])-len(self.time_delta2[group_id])) :],
scaler2, self.td2_fig_svm, "", "Pkt/Time^2")
self.plot_1d_svm(self.time_delta3[group_id], time_classifier3,
list(self.time_data[group_id])[(len(self.time_data[group_id])-len(self.time_delta3[group_id])) :],
scaler3, self.td3_fig_svm, "Time", "Pkt/Time^3")
except (KeyError, IndexError, ValueError) as e:
print e
db_predict1, db_predict2, db_predict3 = 0,0,0
return db_predict1, db_predict2, db_predict3