Python Pipeline.fit_predict方法代码示例

本文整理汇总了Python中sklearn.pipeline.Pipeline.fit_predict方法的典型用法代码示例。如果您正苦于以下问题：Python Pipeline.fit_predict方法的具体用法？Python Pipeline.fit_predict怎么用？Python Pipeline.fit_predict使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.pipeline.Pipeline的用法示例。

在下文中一共展示了Pipeline.fit_predict方法的7个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_fit_predict_with_intermediate_fit_params

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit_predict [as 别名]
def test_fit_predict_with_intermediate_fit_params():
    # tests that Pipeline passes fit_params to intermediate steps
    # when fit_predict is invoked
    pipe = Pipeline([('transf', TransfFitParams()), ('clf', FitParamT())])
    pipe.fit_predict(X=None,
                     y=None,
                     transf__should_get_this=True,
                     clf__should_succeed=True)
    assert_true(pipe.named_steps['transf'].fit_params['should_get_this'])
    assert_true(pipe.named_steps['clf'].successful)
    assert_false('should_succeed' in pipe.named_steps['transf'].fit_params)

开发者ID:lebigot，项目名称:scikit-learn，代码行数:13，代码来源:test_pipeline.py

示例2: bestClassify

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit_predict [as 别名]
def bestClassify(X,Y):
	"Best classifier function"
	tfidf = True

	if tfidf:
		vec = TfidfVectorizer(preprocessor = identity,
							tokenizer = identity, sublinear_tf = True)
	else:
		vec = CountVectorizer(preprocessor = identity,
							tokenizer = identity)

	km = KMeans(n_clusters=2, n_init=100, verbose=1)
	clusterer = Pipeline( [('vec', vec),
								('cls', km)] )

	prediction = clusterer.fit_predict(X,Y)

	checker = defaultdict(list)
	for pred,truth in zip(prediction,Y):
		checker[pred].append(truth)

	labeldict = {}
	for pred, label in checker.items():
		labeldict[pred] = Counter(label).most_common(1)[0][0]
		#print(pred, Counter(label).most_common(1)[0][0])

	prediction = [labeldict[p] for p in prediction]
	labels = list(labeldict.values())
	print(labels)
	print(confusion_matrix(Y, prediction, labels=labels))

	print("Homogeneity:", homogeneity_score(Y,prediction))
	print("Completeness:", completeness_score(Y,prediction))
	print("V-measure:", v_measure_score(Y,prediction))
	print("Rand-Index:", adjusted_rand_score(Y,prediction))

开发者ID:Martbov，项目名称:LearningFromData，代码行数:37，代码来源:LFDassignment5_KMBinary_Mart.py

示例3: test_pipeline

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit_predict [as 别名]
def test_pipeline():
    trajs = AlanineDipeptide().get_cached().trajectories
    topology = trajs[0].topology

    indices = topology.select('backbone')
    p = Pipeline([
        ('diheds', SuperposeFeaturizer(indices, trajs[0][0])),
        ('hmm', GaussianHMM(n_states=4))
    ])

    predict = p.fit_predict(trajs)
    p.named_steps['hmm'].summarize()

开发者ID:Eigenstate，项目名称:msmbuilder，代码行数:14，代码来源:test_ghmm.py

示例4: test_fit_predict_on_pipeline

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit_predict [as 别名]
def test_fit_predict_on_pipeline():
    # test that the fit_predict method is implemented on a pipeline
    # test that the fit_predict on pipeline yields same results as applying
    # transform and clustering steps separately
    iris = load_iris()
    scaler = StandardScaler()
    km = KMeans(random_state=0)

    # first compute the transform and clustering step separately
    scaled = scaler.fit_transform(iris.data)
    separate_pred = km.fit_predict(scaled)

    # use a pipeline to do the transform and clustering in one step
    pipe = Pipeline([('scaler', scaler), ('Kmeans', km)])
    pipeline_pred = pipe.fit_predict(iris.data)

    assert_array_almost_equal(pipeline_pred, separate_pred)

开发者ID:Givonaldo，项目名称:scikit-learn，代码行数:19，代码来源:test_pipeline.py

示例5: train

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit_predict [as 别名]
	def train(self,argv):
		testmode = False #seperate testfile or do cross validation

		if len(argv) == 2:
		    trainfile = argv[1]
		else:
		    exit("Use kmeansBinary.py <trainfile>")


		# X and Y are the result of the read corpus function. X is a list of all documents that are tokenized and Y is a list of all labels
		# The use_sentiment boolean can be changed to use the categories(False) or the polarity(True)
		X, Y = self.read_corpus(trainfile, use_sentiment=True)

		# we use a dummy function as tokenizer and preprocessor,
		# since the texts are already preprocessed and tokenized.
		vec = TfidfVectorizer(preprocessor = self.identity, tokenizer = self.identity,sublinear_tf=True)
		#vec = CountVectorizer(preprocessor = self.identity, tokenizer = self.identity)
		#vec = DictVectorizer()

		km = Pipeline( [('vec', vec),
                            ('cls', cluster.KMeans(n_clusters=2, n_init=10, verbose=1))] )
		
		labels_pred = km.fit_predict(X,Y)
		labels_true = Y

		c = defaultdict(list)
		#calculate confusion matrix
		for pred,true in zip(labels_pred,labels_true):
			c[pred].append(true)

		label = {}
		for key in c:
			count = Counter(c[key])
			label[key] = count.most_common(1)[0][0]
			print(key, count.most_common(6))

		labels_pred = [label[l] for l in labels_pred]
		labels = list(set(label.values()))
		print(labels)
		
		print(vec.get_feature_names())
		print("Homogeneity: %0.3f" % homogeneity_score(labels_true, labels_pred))
		print("Completeness: %0.3f" % completeness_score(labels_true, labels_pred))
		print("V-measure: %0.3f" % v_measure_score(labels_true, labels_pred))
		print("Adjusted Rand-Index: %.3f" % adjusted_rand_score(labels_true, labels_pred))
		print(confusion_matrix(labels_true, labels_pred, labels=labels))

开发者ID:chrispool，项目名称:lfd，代码行数:48，代码来源:kmeansBinary_chrispool.py

示例6: test_fit_predict_on_pipeline

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit_predict [as 别名]
def test_fit_predict_on_pipeline():
    # test that the fit_predict method is implemented on a pipeline
    # test that the fit_predict on pipeline yields same results as applying
    # transform and clustering steps separately
    iris = load_iris()
    scaler = StandardScaler()
    km = KMeans(random_state=0)
    # As pipeline doesn't clone estimators on construction,
    # it must have its own estimators
    scaler_for_pipeline = StandardScaler()
    km_for_pipeline = KMeans(random_state=0)

    # first compute the transform and clustering step separately
    scaled = scaler.fit_transform(iris.data)
    separate_pred = km.fit_predict(scaled)

    # use a pipeline to do the transform and clustering in one step
    pipe = Pipeline([("scaler", scaler_for_pipeline), ("Kmeans", km_for_pipeline)])
    pipeline_pred = pipe.fit_predict(iris.data)

    assert_array_almost_equal(pipeline_pred, separate_pred)

开发者ID:cheral，项目名称:scikit-learn，代码行数:23，代码来源:test_pipeline.py

示例7: clustering_captcha

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit_predict [as 别名]
	def clustering_captcha(self, image_path, check=False):
		"""对验证码图像进行聚类操作以分离出验证码图片中的各个字符

		参数
		－－－－
		image_path: str
			单个验证码图片的绝对路径
		check: bool
			是否对聚类后的验证码图片检查聚类效果及基于列的像素点分布图

		返回值
		－－－－
		(image_vectors, col_npixs): tuple [2]
			长度为2的tuple，其中tuple的第一个对象为根据聚类得到的除背景以外的
			所有类的像素矩阵，tuple的第二个对象为第一个对象所形成图像的每一列
			的非背景像素个数
			image_vectors: {array-like} [self.width * self.height, self.n_chars + 1]
			col_npixs: {array-like} [self.width, self.n_chars + 1]
		"""
		image = self.de_noise(image_path)
		image_pixs = np.array(image.getdata())
		image_pixs = image_pixs.astype(np.float)

		sc = StandardScaler()
		km = KMeans(n_clusters=(self.n_chars + 2))
		clu = Pipeline(steps=[('sc', sc), ('km', km)])
		clusters = clu.fit_predict(image_pixs)

		image_vectors = np.zeros((self.n_chars+2, self.width*self.height))
		col_npixs = np.zeros((self.n_chars+2, self.width))

		for i in np.unique(clusters):
			image_vectors[i, clusters == i] = 1
			image_vectors[i, :] = self.de_line(image_vectors[i, :])
			col_npixs[i, :] = image_vectors[i, :].reshape((
				self.height, self.width)).sum(axis=0)
		cluster_bkg = np.argmax(col_npixs.sum(axis=1))
		image_vectors = np.delete(image_vectors, (cluster_bkg), axis=0)
		col_npixs = np.delete(col_npixs, (cluster_bkg), axis=0)

		if check:
			if not self.checking_path:
				self.checking_path = os.path.join(self.training_images_path, 'checking')

			if not os.path.isdir(self.checking_path):
				os.mkdir(self.checking_path)

			clusters_path = os.path.join(self.checking_path, 'clusters')
			if not os.path.isdir(clusters_path):
				os.mkdir(clusters_path)

			n_clusters = col_npixs.shape[0]
			img_name = os.path.split(image_path)[1].split('.')[0]
			for i in range(n_clusters):
				new_img_name = os.path.join(clusters_path, 
					img_name + '_cluster' + str(i) + '_img' + '.jpg')
				new_fig_name = os.path.join(clusters_path, 
					img_name + '_cluster' + str(i) + '_fig' + '.jpg')

				im_new = Image.new('1', (self.width, self.height))
				im_new.putdata(image_vectors[i, :])
				im_new.save(new_img_name)
				plt.plot(col_npixs[i, :])
				plt.savefig(new_fig_name)
				plt.close('all')

		return (image_vectors, col_npixs)

开发者ID:xcctbys，项目名称:Captchacrack，代码行数:69，代码来源:ShanghaiCaptchaSolver.py

注：本文中的sklearn.pipeline.Pipeline.fit_predict方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。