本文整理汇总了Python中sklearn.neighbors.kde.KernelDensity.fit方法的典型用法代码示例。如果您正苦于以下问题:Python KernelDensity.fit方法的具体用法?Python KernelDensity.fit怎么用?Python KernelDensity.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.neighbors.kde.KernelDensity
的用法示例。
在下文中一共展示了KernelDensity.fit方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: estimate_distribution
# 需要导入模块: from sklearn.neighbors.kde import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.kde.KernelDensity import fit [as 别名]
def estimate_distribution(samples, h=0.1, n_points=100):
kde = KernelDensity(bandwidth=h)
samples = samples[:, np.newaxis]
kde.fit(samples)
xs = np.linspace(-1.0, 1.0, n_points)
ys = [np.exp(kde.score([x])) for x in xs]
return xs, ys
示例2: OneClassKDE
# 需要导入模块: from sklearn.neighbors.kde import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.kde.KernelDensity import fit [as 别名]
class OneClassKDE(BaseClassifier):
_fit_params = ["bandwidth"]
_predict_params = []
def __init__(self, *args, **kwargs):
self.bandwidth = kwargs["bandwidth"]
self.perc_keep = kwargs["perc_keep"]
def fit(self, data, **kwargs):
#self.train_data = data
self.kde = KernelDensity(kernel='gaussian', bandwidth=self.bandwidth)
idx = numpy.random.randint(2, size=len(data)).astype(numpy.bool)
print idx
self.kde.fit(data[idx, :])
self.training_score = self.kde.score_samples(data[~idx, :])
self.direct_thresh = numpy.percentile(self.training_score, 100-self.perc_keep)
print 'training', self.training_score.min(), self.training_score.mean(), self.training_score.max(), self.direct_thresh
print self.direct_thresh
def predict(self, data):
score = self.kde.score_samples(data)
self.score = score
res = (score < self.direct_thresh)
print 'test', self.score.min(), self.score.mean(), self.score.max()
print res.sum(), "of", len(self.score), 'outliers'
return res.astype(numpy.uint8)*-2+1
def decision_function(self, data=None):
return self.score
示例3: _importance_preprocess_uni
# 需要导入模块: from sklearn.neighbors.kde import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.kde.KernelDensity import fit [as 别名]
def _importance_preprocess_uni(states, rewards, gradients, p_tar, p_gen):
res = _create_episode_info()
flat_states = [s for traj in states for s in traj]
# TODO Pass in as args?
kde = KernelDensity(kernel='gaussian', bandwidth=0.25)
kde.fit(flat_states)
for ss, rs, gs, ps, qs in izip(states, rewards, gradients, p_tar, p_gen):
state_probs = kde.score_samples(ss)
traj_p = np.cumsum(ps) # + np.mean(state_probs)
traj_q = np.cumsum(qs) + state_probs
traj_grads = np.cumsum(gs, axis=0)
r_acc = np.cumsum(rs[::-1])[::-1]
r_grad = (r_acc * traj_grads.T).T
res.r_grads.extend(r_grad)
res.traj_p_tar.extend(traj_p)
res.traj_p_gen.extend(traj_q)
res.traj_grads.extend(traj_grads)
res.traj_r.extend(r_acc)
# Used for estimating fisher
res.act_grads.extend(gs)
res.state_act_p_tar.extend(traj_p)
res.state_act_p_gen.extend(traj_q)
return res
示例4: createfeatmat
# 需要导入模块: from sklearn.neighbors.kde import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.kde.KernelDensity import fit [as 别名]
def createfeatmat(N):
grid = getgridcoords(N).T
featmat = np.zeros((len(vals), N ** 2))
for i in range(len(vals)):
m = np.array([vals[i][0], vals[i][1]]).T
k = KernelDensity(bandwidth=0.5 / (N - 1), kernel="gaussian")
k.fit(m)
featmat[i, :] = k.score_samples(grid)
return featmat
示例5: kde_sklearn
# 需要导入模块: from sklearn.neighbors.kde import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.kde.KernelDensity import fit [as 别名]
def kde_sklearn(x, x_grid, bandwidth=0.2, **kwargs):
"""Kernel Density Estimation with Scikit-learn"""
kde_skl = KernelDensity(bandwidth=bandwidth, **kwargs)
kde_skl.fit(x[:, np.newaxis])
# score_samples() returns the log-likelihood of the samples
log_pdf = kde_skl.score_samples(x_grid[:, np.newaxis])
N = np.trapz(np.exp(log_pdf), x_grid)
return np.exp(log_pdf)/N
示例6: estimate_distribution
# 需要导入模块: from sklearn.neighbors.kde import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.kde.KernelDensity import fit [as 别名]
def estimate_distribution(samples, h=0.1, n_points=100):
kde = KernelDensity(bandwidth=h)
min_xs = min(samples)
max_xs = max(samples)
samples = samples[:, np.newaxis]
kde.fit(samples)
xs = np.linspace(min_xs, max_xs, n_points)
ys = np.exp(kde.score_samples(xs[:, np.newaxis]))
print xs.shape, ys.shape, sum(ys)
return xs, ys
示例7: OneClassKDE
# 需要导入模块: from sklearn.neighbors.kde import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.kde.KernelDensity import fit [as 别名]
class OneClassKDE(BaseClassifier):
_fit_params = ["bandwidth"]
def __init__(self, *args, **kwargs):
self.bandwidth = kwargs["bandwidth"]
def fit(self, data, **kwargs):
#self.train_data = data
self.kde = KernelDensity(kernel='gaussian', bandwidth=self.bandwidth)
self.kde.fit(data)
self.training_score = self.kde.score_samples(data)
self.direct_thresh = numpy.percentile(self.training_score, 10)
def predict(self, data):
score = self.kde.score_samples(data)
self.score = score
return (score < self.direct_thresh).astype(numpy.int32)*-2+1
def decision_function(self, data):
return self.score
示例8: nmultitype_conf_matrix
# 需要导入模块: from sklearn.neighbors.kde import KernelDensity [as 别名]
# 或者: from sklearn.neighbors.kde.KernelDensity import fit [as 别名]
def nmultitype_conf_matrix(self,tipos,nfolds):
cadena = ""
for t in tipos:
cadena += t
if not os.path.exists("models/nmultitype_conf_matrix" + self.bd +"ts"+cadena+"Promedio"+str(nfolds)+".p") or True:
#Creamos la matriz de matrices donde guardaremos los resultados parciales
matrices = [None] * nfolds * nfolds
#Creamos/Recuperamos el modelo Node2Vec
n2v = node2vec(self.bd,self.port,self.user,self.pss,self.label,1000,20,6,self.mode,[],1)
n2v.learn("normal",0,False,0)
#Creamos los arrays X e Y, anadiendo
X = []
Y = []
#Creamos un array de comunes que son los nodos que son a la vez de ambos tipos
comunes = list()
for tipo in tipos:
for n in n2v.n_types[tipo]:
if n in n2v.w2v:
X.append(n2v.w2v[n])
if n in n2v.n_types[tipos[0]] and n in n2v.n_types[tipos[1]]:
comunes.append(n2v.w2v[n])
Y.append(tipo)
#Creamos los k folds estratificados
X = np.array(X)
Y = np.array(Y)
skf = StratifiedKFold(Y, n_folds=nfolds)
it = 0
kdes = []
for train_index, test_index in skf:
print "k-fold para kde"
X_train, X_test = X[train_index], X[test_index]
Y_train, Y_test = Y[train_index], Y[test_index]
Y_test = Y_test.astype('|S64')
#Creamos la funcion de densidad de probabilidad de cada tipo
for t in tipos:
print "Creando KDE para el tipo "+t
tempX = []
for idx,n in enumerate(Y_train):
if n == t:
tempX.append(X_train[idx])
#Calculating KDE with the train set
#use grid search cross-validation to optimize the bandwidth
#params = {'bandwidth': np.logspace(-1, 1, 10)}
#grid = GridSearchCV(neighbors.KernelDensity(), params)
#grid.fit(tempX)
#print("best bandwidth: {0}".format(grid.best_estimator_.bandwidth))
# use the best estimator to compute the kernel density estimate
#kde = grid.best_estimator_
kde = KernelDensity(kernel='gaussian', bandwidth=0.1)
kde.fit(tempX)
kdes.append(kde)
print "Terminado KDE para el tipo "+t
#Dividimos el conjunto de test en tipo1, tipo2 y tipo1+2
cont = 0
for idx,x in enumerate(X_test):
total = 0
x = np.array(x)
if any((x == a).all() for a in comunes):
Y_test[idx] = str(tipos[0]+"+"+tipos[1])
cont += 1
print "Numero de elementos con doble tipo:"+str(cont)
#Creamos k-folds estratificados para el arbol de decision
skf = StratifiedKFold(Y_test, n_folds=nfolds)
for train_index, test_index in skf:
print "k-fold para decission tree"
X_train1, X_test1 = X_test[train_index], X_test[test_index]
Y_train1, Y_test1 = Y_test[train_index], Y_test[test_index]
clf = DecisionTreeClassifier(random_state=0)
print X_train1[0]
clf.fit(X_train1,Y_train1)
export_graphviz(clf);
Y_pred1 = clf.predict(X_test1)
matriz = metrics.confusion_matrix(Y_test1, Y_pred1,[tipos[0],tipos[1],tipos[0]+"+"+tipos[1]])
matrices[it] = np.array(matriz)
print matrices[it]
it += 1
f = open( "models/nmultitype_conf_matrix" + self.bd +"ts"+cadena+"Promedio"+str(nfolds)+".p", "w" )
pickle.dump(matrices,f)
else:
f = open( "models/nmultitype_conf_matrix" + self.bd +"ts"+cadena+"Promedio"+str(nfolds)+".p", "r" )
matrices = pickle.load(f)
total = matrices[0]
for m in matrices[1:]:
total += m
print total
matriz_promedio = total
matriz_promedio = matriz_promedio.astype('float')
#print matrices
#print matriz_promedio
matriz_promedio = matriz_promedio / len(matrices)
#print matriz_promedio
#calculando porcentajes a partir del promedio de frecuencias
for i in range(0,len(matriz_promedio)):
suma = 0
for j in range(0,len(matriz_promedio)):
suma += matriz_promedio[i][j]
matriz_promedio[i][j] = float(matriz_promedio[i][j])
for j in range(0,len(matriz_promedio)):
if suma > 0:
matriz_promedio[i][j] = round(float(matriz_promedio[i][j] * 100) / float(suma),2)
#.........这里部分代码省略.........