本文整理汇总了Python中sklearn.pipeline.Pipeline.fit_predict方法的典型用法代码示例。如果您正苦于以下问题:Python Pipeline.fit_predict方法的具体用法?Python Pipeline.fit_predict怎么用?Python Pipeline.fit_predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.pipeline.Pipeline
的用法示例。
在下文中一共展示了Pipeline.fit_predict方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_fit_predict_with_intermediate_fit_params
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit_predict [as 别名]
def test_fit_predict_with_intermediate_fit_params():
# tests that Pipeline passes fit_params to intermediate steps
# when fit_predict is invoked
pipe = Pipeline([('transf', TransfFitParams()), ('clf', FitParamT())])
pipe.fit_predict(X=None,
y=None,
transf__should_get_this=True,
clf__should_succeed=True)
assert_true(pipe.named_steps['transf'].fit_params['should_get_this'])
assert_true(pipe.named_steps['clf'].successful)
assert_false('should_succeed' in pipe.named_steps['transf'].fit_params)
示例2: bestClassify
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit_predict [as 别名]
def bestClassify(X,Y):
"Best classifier function"
tfidf = True
if tfidf:
vec = TfidfVectorizer(preprocessor = identity,
tokenizer = identity, sublinear_tf = True)
else:
vec = CountVectorizer(preprocessor = identity,
tokenizer = identity)
km = KMeans(n_clusters=2, n_init=100, verbose=1)
clusterer = Pipeline( [('vec', vec),
('cls', km)] )
prediction = clusterer.fit_predict(X,Y)
checker = defaultdict(list)
for pred,truth in zip(prediction,Y):
checker[pred].append(truth)
labeldict = {}
for pred, label in checker.items():
labeldict[pred] = Counter(label).most_common(1)[0][0]
#print(pred, Counter(label).most_common(1)[0][0])
prediction = [labeldict[p] for p in prediction]
labels = list(labeldict.values())
print(labels)
print(confusion_matrix(Y, prediction, labels=labels))
print("Homogeneity:", homogeneity_score(Y,prediction))
print("Completeness:", completeness_score(Y,prediction))
print("V-measure:", v_measure_score(Y,prediction))
print("Rand-Index:", adjusted_rand_score(Y,prediction))
示例3: test_pipeline
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit_predict [as 别名]
def test_pipeline():
trajs = AlanineDipeptide().get_cached().trajectories
topology = trajs[0].topology
indices = topology.select('backbone')
p = Pipeline([
('diheds', SuperposeFeaturizer(indices, trajs[0][0])),
('hmm', GaussianHMM(n_states=4))
])
predict = p.fit_predict(trajs)
p.named_steps['hmm'].summarize()
示例4: test_fit_predict_on_pipeline
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit_predict [as 别名]
def test_fit_predict_on_pipeline():
# test that the fit_predict method is implemented on a pipeline
# test that the fit_predict on pipeline yields same results as applying
# transform and clustering steps separately
iris = load_iris()
scaler = StandardScaler()
km = KMeans(random_state=0)
# first compute the transform and clustering step separately
scaled = scaler.fit_transform(iris.data)
separate_pred = km.fit_predict(scaled)
# use a pipeline to do the transform and clustering in one step
pipe = Pipeline([('scaler', scaler), ('Kmeans', km)])
pipeline_pred = pipe.fit_predict(iris.data)
assert_array_almost_equal(pipeline_pred, separate_pred)
示例5: train
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit_predict [as 别名]
def train(self,argv):
testmode = False #seperate testfile or do cross validation
if len(argv) == 2:
trainfile = argv[1]
else:
exit("Use kmeansBinary.py <trainfile>")
# X and Y are the result of the read corpus function. X is a list of all documents that are tokenized and Y is a list of all labels
# The use_sentiment boolean can be changed to use the categories(False) or the polarity(True)
X, Y = self.read_corpus(trainfile, use_sentiment=True)
# we use a dummy function as tokenizer and preprocessor,
# since the texts are already preprocessed and tokenized.
vec = TfidfVectorizer(preprocessor = self.identity, tokenizer = self.identity,sublinear_tf=True)
#vec = CountVectorizer(preprocessor = self.identity, tokenizer = self.identity)
#vec = DictVectorizer()
km = Pipeline( [('vec', vec),
('cls', cluster.KMeans(n_clusters=2, n_init=10, verbose=1))] )
labels_pred = km.fit_predict(X,Y)
labels_true = Y
c = defaultdict(list)
#calculate confusion matrix
for pred,true in zip(labels_pred,labels_true):
c[pred].append(true)
label = {}
for key in c:
count = Counter(c[key])
label[key] = count.most_common(1)[0][0]
print(key, count.most_common(6))
labels_pred = [label[l] for l in labels_pred]
labels = list(set(label.values()))
print(labels)
print(vec.get_feature_names())
print("Homogeneity: %0.3f" % homogeneity_score(labels_true, labels_pred))
print("Completeness: %0.3f" % completeness_score(labels_true, labels_pred))
print("V-measure: %0.3f" % v_measure_score(labels_true, labels_pred))
print("Adjusted Rand-Index: %.3f" % adjusted_rand_score(labels_true, labels_pred))
print(confusion_matrix(labels_true, labels_pred, labels=labels))
示例6: test_fit_predict_on_pipeline
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit_predict [as 别名]
def test_fit_predict_on_pipeline():
# test that the fit_predict method is implemented on a pipeline
# test that the fit_predict on pipeline yields same results as applying
# transform and clustering steps separately
iris = load_iris()
scaler = StandardScaler()
km = KMeans(random_state=0)
# As pipeline doesn't clone estimators on construction,
# it must have its own estimators
scaler_for_pipeline = StandardScaler()
km_for_pipeline = KMeans(random_state=0)
# first compute the transform and clustering step separately
scaled = scaler.fit_transform(iris.data)
separate_pred = km.fit_predict(scaled)
# use a pipeline to do the transform and clustering in one step
pipe = Pipeline([("scaler", scaler_for_pipeline), ("Kmeans", km_for_pipeline)])
pipeline_pred = pipe.fit_predict(iris.data)
assert_array_almost_equal(pipeline_pred, separate_pred)
示例7: clustering_captcha
# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit_predict [as 别名]
def clustering_captcha(self, image_path, check=False):
"""对验证码图像进行聚类操作以分离出验证码图片中的各个字符
参数
----
image_path: str
单个验证码图片的绝对路径
check: bool
是否对聚类后的验证码图片检查聚类效果及基于列的像素点分布图
返回值
----
(image_vectors, col_npixs): tuple [2]
长度为2的tuple,其中tuple的第一个对象为根据聚类得到的除背景以外的
所有类的像素矩阵,tuple的第二个对象为第一个对象所形成图像的每一列
的非背景像素个数
image_vectors: {array-like} [self.width * self.height, self.n_chars + 1]
col_npixs: {array-like} [self.width, self.n_chars + 1]
"""
image = self.de_noise(image_path)
image_pixs = np.array(image.getdata())
image_pixs = image_pixs.astype(np.float)
sc = StandardScaler()
km = KMeans(n_clusters=(self.n_chars + 2))
clu = Pipeline(steps=[('sc', sc), ('km', km)])
clusters = clu.fit_predict(image_pixs)
image_vectors = np.zeros((self.n_chars+2, self.width*self.height))
col_npixs = np.zeros((self.n_chars+2, self.width))
for i in np.unique(clusters):
image_vectors[i, clusters == i] = 1
image_vectors[i, :] = self.de_line(image_vectors[i, :])
col_npixs[i, :] = image_vectors[i, :].reshape((
self.height, self.width)).sum(axis=0)
cluster_bkg = np.argmax(col_npixs.sum(axis=1))
image_vectors = np.delete(image_vectors, (cluster_bkg), axis=0)
col_npixs = np.delete(col_npixs, (cluster_bkg), axis=0)
if check:
if not self.checking_path:
self.checking_path = os.path.join(self.training_images_path, 'checking')
if not os.path.isdir(self.checking_path):
os.mkdir(self.checking_path)
clusters_path = os.path.join(self.checking_path, 'clusters')
if not os.path.isdir(clusters_path):
os.mkdir(clusters_path)
n_clusters = col_npixs.shape[0]
img_name = os.path.split(image_path)[1].split('.')[0]
for i in range(n_clusters):
new_img_name = os.path.join(clusters_path,
img_name + '_cluster' + str(i) + '_img' + '.jpg')
new_fig_name = os.path.join(clusters_path,
img_name + '_cluster' + str(i) + '_fig' + '.jpg')
im_new = Image.new('1', (self.width, self.height))
im_new.putdata(image_vectors[i, :])
im_new.save(new_img_name)
plt.plot(col_npixs[i, :])
plt.savefig(new_fig_name)
plt.close('all')
return (image_vectors, col_npixs)