当前位置: 首页>>代码示例>>Python>>正文


Python Pipeline.set_params方法代码示例

本文整理汇总了Python中sklearn.pipeline.Pipeline.set_params方法的典型用法代码示例。如果您正苦于以下问题:Python Pipeline.set_params方法的具体用法?Python Pipeline.set_params怎么用?Python Pipeline.set_params使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.pipeline.Pipeline的用法示例。


在下文中一共展示了Pipeline.set_params方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: score_for_params

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def score_for_params(kw) :
    '''
    | Get score from just one set of parameters
    | Takes in keyword arguments, including whether or not median filter will be included.
    '''
    
    # Turn the tuple into a packed dictionary to get all parameters
    params = _get_params(logistic_regression__C=kw) 

    # Create the pipeline which consists of image
    # processing and a classifier
    # Note - can make this map to a dictionary of image processors instead of just median
    image_processors = [ ('hog', image_processing.HOG()) ]
    if params.pop('median') :
        image_processors.insert(0,('median_smooth', image_processing.MedianSmooth()))
    else :
        params.pop('median_smooth__radius')

    classifier = ('logistic_regression', LogisticRegression())

    estimators = image_processors + [classifier]
    
    pipeline = Pipeline(estimators)

    # Create the grid search with list of parameters
    # to search.  All values are now tuples
    pipeline.set_params(**params).fit(X_train, y_train)

    pipeline.predict(X_test)
    
    return pipeline.score(X_train,y_train), pipeline.score(X_test, y_test)
开发者ID:cavestruz,项目名称:StrongCNN,代码行数:33,代码来源:parallel_pipeline.py

示例2: predictModelForModelSelection

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def predictModelForModelSelection(city, config, grid, grid_crimes, grid_column, freq_short, n_feat, crime_column,
                                  cell_id, train_win, tspan, sspan, estimator, param_grid, model_name):
    preparator = dataprep.DataPreparator(city, config, grid, grid_crimes,
                                         grid_column, freq_short, n_feat)
    # get the data into df_base format
    df_base = preparator.getTimeSeriesData(crime_column, cell_id, train_win, tspan, 1,
                                           sspan, 1, 'ModelSel_R_in', 'ModelSel_R_out')
    # prepare df_base to fit the formal requirements of tsfresh
    input_columns_lag = ['Crimes']
    y_col = 'Crimes'
    ts_data, y = preparator.prepareTimeSeriesData(df_base, input_columns_lag, y_col)
    # split data
    X_train, X_test, y_train, y_test = helper.splitData(y, n_train, n_test)





    pipeline = Pipeline([('lagCrime', lagExtractor.LagExtractor(n_lag=n_feat)),
                         ('normalizer', normalizer),
                         ('estimator', estimator)
                         ])
    pipeline.set_params(lagCrime__lagged_ts_container=preparator.ts_lag(df_base, 'Crimes'))
    rand_grid_search = model_selection.RandomizedSearchCV(pipeline, param_distributions=param_grid, n_iter = 100,
                                                    cv=model_selection.TimeSeriesSplit(n_splits=3))
    rand_grid_search.fit(X_train, y_train)
    y_pred = rand_grid_search.predict(X_test)

    return reporting.getPredictionResults_cell_only_Model(cell_id, model_name, n_feat, n_train, n_test, y_test, y_pred), y_test, y_pred
开发者ID:teco-kit,项目名称:PredictivePolicing,代码行数:31,代码来源:AnaModelSelection.py

示例3: main

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def main(datafile, threshold):
    filename = "out{}{}.hrc".format(os.sep, os.path.basename(datafile.name))

    if not os.path.isfile(filename):
        header = datafile.readline()
        collist = [i for i, toggle in enumerate(header.split(",")) if toggle != "0"]
        datafile.seek(0)
        data = pd.read_csv(datafile, usecols=collist).as_matrix()

        pipeline = Pipeline([("clf", Hierarchical())])
        pipeline.set_params(**{})
        pipeline.fit(data)

        clf = pipeline.get_params()["clf"]
        hierarchy = clf.hierarchy_

        with open(filename, "wb") as fh:
            fh.write(ET.tostring(hierarchy.to_xml()))
    else:
        with open(filename, "rb") as fh:
            hierarchy = Cluster.from_xml(ET.parse(fh).getroot())

    print(ET.tostring(hierarchy.to_xml()).decode("utf-8"))

    if threshold != None:
        clusters = hierarchy.cut(threshold)
        print("\n".join(c.to_str(i) for i, c in enumerate(clusters)))
        dump_graph(clusters)
开发者ID:joshterrell805-historic,项目名称:CPE466-KDD,代码行数:30,代码来源:hierarchical.py

示例4: DecompositionPreprocessor

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
class DecompositionPreprocessor(Preprocessor):
    def __init__(self, training_data, input_variables, decomp_method='pca', n_components=2):
        assert training_data.shape[1] == len(input_variables)
        self.input_variables = input_variables
        self.n_components = n_components
        self.variables = ['V%d' % (i + 1) for i in range(self.n_components)]

        impute = Imputer()
        nzv = VarianceThreshold()
        scale = StandardScaler()

        model_l = [('impute', impute), ('nzv', nzv), ('scale', scale)]

        if decomp_method == 'pca':
            pca = PCA()
            model_l.append(('pca', pca))
            self.model = Pipeline(model_l)
            self.model.set_params(pca__whiten = False, pca__n_components = n_components)
        elif decomp_method == 'mbspca':
            mbspca = MiniBatchSparsePCA()
            model_l.append(('mbspca', mbspca))
            self.model = Pipeline(model_l)
            self.model.set_params(mbspca__n_components = n_components, mbspca__verbose = True)

        self._train(training_data)

    def _train(self, training_data):
        self.model.fit(training_data)

    def __call__(self, data):
        return self.model.transform(data)
开发者ID:living1069,项目名称:CellProfiler-Analyst,代码行数:33,代码来源:decomp.py

示例5: PlotPerfPercentFeatures

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def PlotPerfPercentFeatures(X,y,est=LinearSVC()):
    '''
    Performance of a classifier (default: SVM-Anova)
    varying the percentile of features selected (F-test) .

    http://scikit-learn.org/stable/auto_examples/svm/plot_svm_anova.html#example-svm-plot-svm-anova-py
    '''
    transform = SelectPercentile(f_classif)

    clf = Pipeline([('anova', transform), ('est', est)])
    ###############################################################################
    # Plot the cross-validation score as a function of percentile of features
    score_means = list()
    score_stds = list()
    percentiles = (1,2,3,5,7,10,13,15,20,25,33,50,65,75,90, 100)
    # percentiles = (1,5,10,25,50,75,90)

    for percentile in percentiles:
        # print(percentile)
        clf.set_params(anova__percentile=percentile)
        this_scores = cross_val_score(clf, X, y,cv=StratifiedShuffleSplit(y, n_iter=7, test_size=0.3), n_jobs=-1)
        score_means.append(this_scores.mean())
        score_stds.append(this_scores.std())
    print("Outputting Graph:")

    plt.errorbar(percentiles, score_means, np.array(score_stds))

    plt.title(
        'Predictor Performance, varying percent of features used')
    plt.xlabel('Percentile')
    plt.ylabel('Prediction Performance')
    plt.axis('tight')
    plt.show()
开发者ID:MichaelDoron,项目名称:ProFET,代码行数:35,代码来源:OutPutRes.py

示例6: create_ngram_model

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def create_ngram_model(params=None):
    tfidf_ngrams = TfidfVectorizer(ngram_range=(1, 3),
                                   analyzer="word", binary=False)
    clf = MultinomialNB()
    # 结果比较:
    # 0.757   0.046   0.850   0.044   
    # == Pos vs. rest ==
    # 0.595   0.064   0.668   0.071   
    # == Neg vs. rest ==
    # 0.712   0.044   0.505   0.067   
    # time spent: 51.199045181274414


    # 0.771   0.048   0.869   0.053   
    # == Pos vs. rest ==
    # 0.614   0.044   0.692   0.048   
    # == Neg vs. rest ==
    # 0.748   0.030   0.504   0.069
    # params1 = {"alpha": 0.05, "class_prior": None, "fit_prior":True}
    # clf.set_params(**params1)
    pipeline = Pipeline([('vect', tfidf_ngrams), ('clf', clf)])

    if params:
        pipeline.set_params(**params)

    return pipeline
开发者ID:greatabel,项目名称:MachineLearning,代码行数:28,代码来源:i12tunning+parameter+of+classify.py

示例7: classify

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def classify(data_trn,lbl_trn,data_vld,lbl_vld,data_tst,lbl_tst):

	data_trn = normalize(data_trn,copy=False)
	data_vld = normalize(data_vld,copy=False)
	data_tst = normalize(data_tst,copy=False)

	# accuracy metric
	metric_obj = mean_squared_error
	'''
	Train our model to predict labels for the dataset #1
	'''
	parameters = {'svr__gamma': 1.5, 'svr__probability': False, 'svr__epsilon': 0.4, 'svr__C': 1, 'svr__kernel': 'rbf'}
	cls = Pipeline([
			#('feature_selection',LinearSVC()),
			('svr', SVR())
			])
	cls.set_params(**parameters)

	cls.fit(data_trn, lbl_trn)


        pred_vld = cls.predict(data_vld)
        pred_tst = cls.predict(data_tst)

        print ("Score for vld: %.6f" % (metric_obj(lbl_vld, pred_vld),))
        print ("Score for tst: %.6f" % (metric_obj(lbl_tst, pred_tst),))

	return pred_vld,pred_tst
开发者ID:brat000012001,项目名称:DUMLS14,代码行数:30,代码来源:pred_final_ds3.py

示例8: baart_criteria

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
    def baart_criteria(self, X, y):
        """
        Returns the optimal Fourier series degree as determined by
        `Baart's Criteria <http://articles.adsabs.harvard.edu/cgi-bin/nph-iarticle_query?1986A%26A...170...59P&amp;data_type=PDF_HIGH&amp;whole_paper=YES&amp;type=PRINTER&amp;filetype=.pdf>`_ [JOP]_.

        **Citations**

        .. [JOP] J. O. Petersen, 1986,
                 "Studies of Cepheid type variability. IV.
                 The uncertainties of Fourier decomposition parameters.",
                 A&A, Vol. 170, p. 59-69
        """
        try:
            min_degree, max_degree = self.degree_range
        except ValueError:
            raise ValueError("Degree range must be a length two sequence")

        cutoff = self.baart_tolerance(X)
        pipeline = Pipeline([('Fourier', Fourier()),
                             ('Regressor', self.regressor)])
        sorted_X = numpy.sort(X, axis=0)
        X_sorting = numpy.argsort(rowvec(X))
        for degree in range(min_degree, max_degree):
            pipeline.set_params(Fourier__degree=degree)
            pipeline.fit(X, y)
            lc = pipeline.predict(sorted_X)
            residuals = y[X_sorting] - lc
            p_c = autocorrelation(residuals)
            if abs(p_c) <= cutoff:
                return degree
        # reached max_degree without reaching cutoff
        return max_degree
开发者ID:astroswego,项目名称:plotypus,代码行数:34,代码来源:preprocessing.py

示例9: svm_anova

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def svm_anova(train_data, labels):
    ###############################################################################
    # Create a feature-selection transform and an instance of SVM that we
    # combine together to have an full-blown estimator

    transform = SelectPercentile(f_classif)

    clf = Pipeline([('anova', transform), ('svc', SVC(C = 1.0))])

    ###############################################################################
    # Plot the cross-validation score as a function of percentile of features
    score_means = list()
    score_stds = list()
    percentiles = (1, 3, 6, 10, 15, 20, 30, 40, 60, 80, 100)

    for percentile in percentiles:
        clf.set_params(anova__percentile = percentile)
        # Compute cross-validation score using all CPUs
        this_scores = cross_val_score(clf, train_data, labels, n_jobs = 1)
        score_means.append(this_scores.mean())
        score_stds.append(this_scores.std())

    plt.errorbar(percentiles, score_means, np.array(score_stds))

    plt.title('Performance of the SVM-Anova varying the percentile of features selected')
    plt.xlabel('Percentile')
    plt.ylabel('Prediction rate')

    plt.axis('tight')
    plt.show()
开发者ID:kirk86,项目名称:Task-1,代码行数:32,代码来源:misc.py

示例10: create_union_model

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def create_union_model(params=None):
    def preprocessor(tweet):
        tweet = tweet.lower()

        for k in emo_repl_order:
            tweet = tweet.replace(k, emo_repl[k])
        for r, repl in re_repl.items():
            tweet = re.sub(r, repl, tweet)

        return tweet.replace("-", " ").replace("_", " ")

    tfidf_ngrams = TfidfVectorizer(preprocessor=preprocessor,
                                   analyzer="word")
    ling_stats = LinguisticVectorizer()
    all_features = FeatureUnion(
        [('ling', ling_stats), ('tfidf', tfidf_ngrams)])
    #all_features = FeatureUnion([('tfidf', tfidf_ngrams)])
    #all_features = FeatureUnion([('ling', ling_stats)])
    clf = MultinomialNB()
    pipeline = Pipeline([('all', all_features), ('clf', clf)])

    if params:
        pipeline.set_params(**params)

    return pipeline
开发者ID:greatabel,项目名称:MachineLearning,代码行数:27,代码来源:i14combine+classify.py

示例11: create_ngram_model

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def create_ngram_model(params=None):
    tfidf_ngrams = TfidfVectorizer(ngram_range=(1, 3), analyzer="word", binary=False)
    clf = MultinomialNB()
    pipeline = Pipeline([("vect", tfidf_ngrams), ("clf", clf)])
    if params:
        pipeline.set_params(**params)
    return pipeline
开发者ID:ciah0704,项目名称:building-ml-systems-with-python,代码行数:9,代码来源:sentiment_analysis_tweets_tuning.py

示例12: create_model

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
    def create_model(self, params=None):
        all_features = self._unify_features()
        pipeline = Pipeline([('all', all_features), ('clf', self.classifier)])
        if params:
            pipeline.set_params(**params)

        return pipeline
开发者ID:sjmaharjan,项目名称:authorprofile15,代码行数:9,代码来源:train.py

示例13: test_set_params_nested_pipeline

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def test_set_params_nested_pipeline():
    estimator = Pipeline([
        ('a', Pipeline([
            ('b', DummyRegressor())
        ]))
    ])
    estimator.set_params(a__b__alpha=0.001, a__b=Lasso())
    estimator.set_params(a__steps=[('b', LogisticRegression())], a__b__C=5)
开发者ID:lebigot,项目名称:scikit-learn,代码行数:10,代码来源:test_pipeline.py

示例14: test_pipeline_init

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def test_pipeline_init():
    # Test the various init parameters of the pipeline.
    assert_raises(TypeError, Pipeline)
    # Check that we can't instantiate pipelines with objects without fit
    # method
    pipe = assert_raises(TypeError, Pipeline, [('svc', IncorrectT)])
    # Smoke test with only an estimator
    clf = T()
    pipe = Pipeline([('svc', clf)])
    assert_equal(pipe.get_params(deep=True),
                 dict(svc__a=None, svc__b=None, svc=clf,
                     **pipe.get_params(deep=False)
                     ))

    # Check that params are set
    pipe.set_params(svc__a=0.1)
    assert_equal(clf.a, 0.1)
    assert_equal(clf.b, None)
    # Smoke test the repr:
    repr(pipe)

    # Test with two objects
    clf = SVC()
    filter1 = SelectKBest(f_classif)
    pipe = Pipeline([('anova', filter1), ('svc', clf)])

    # Check that we can't use the same stage name twice
    assert_raises(ValueError, Pipeline, [('svc', SVC()), ('svc', SVC())])

    # Check that params are set
    pipe.set_params(svc__C=0.1)
    assert_equal(clf.C, 0.1)
    # Smoke test the repr:
    repr(pipe)

    # Check that params are not set when naming them wrong
    assert_raises(ValueError, pipe.set_params, anova__C=0.1)

    # Test clone
    pipe2 = clone(pipe)
    assert_false(pipe.named_steps['svc'] is pipe2.named_steps['svc'])

    # Check that apart from estimators, the parameters are the same
    params = pipe.get_params(deep=True)
    params2 = pipe2.get_params(deep=True)
    
    for x in pipe.get_params(deep=False):
        params.pop(x)
    
    for x in pipe2.get_params(deep=False):
        params2.pop(x)
    
    # Remove estimators that where copied
    params.pop('svc')
    params.pop('anova')
    params2.pop('svc')
    params2.pop('anova')
    assert_equal(params, params2)
开发者ID:mackmakudo,项目名称:scikit-learn,代码行数:60,代码来源:test_pipeline.py

示例15: featureSelection_cK

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import set_params [as 别名]
def featureSelection_cK(title,clf,X,y,CV,n_jobs=1,kvalues=[i for i in range(1,43)]):
    """
    Perform "Choose K best" feature selection and returns plot of performance vs. number of features

    Parameters
     ----------
    clf : object type that implements the "fit" and "predict" methods
         An object of that type which is cloned for each validation.

     title : string
         Title for the chart.

     X : array-like, shape (n_samples, n_features)
         Training vector, where n_samples is the number of samples and
         n_features is the number of features.

     y : array-like, shape (n_samples) or (n_samples, n_features), optional
         Target relative to X for classification or regression;
         None for unsupervised learning.

     cv : integer, cross-validation generator, optional
         If an integer is passed, it is the number of folds (defaults to 3).
         Specific cross-validation objects can be passed, see
         sklearn.cross_validation module for the list of possible objects

     n_jobs : integer, optional
         Number of jobs to run in parallel (default 1).

     kvalues : list
         Specify the number of features to test in each iteration
    """

    # Combine a feature-selection transform and a classifier to create a full-blown estimator
    transform = feature_selection.SelectKBest(feature_selection.f_classif)
    clf_k = Pipeline([('anova', transform), ('svc', clf)])

    # Plot the cross-validation score as a function of number of features
    score_means = list()
    score_stds = list()

    for k in kvalues:
        clf_k.set_params(anova__k=k)
        # Compute cross-validation score
        this_scores = cross_validation.cross_val_score(clf_k, X, y, cv=CV, n_jobs=1)
        score_means.append(this_scores.mean())
        score_stds.append(this_scores.std())

    transform.fit(X,y)
    feat_scores = transform.scores_

    plt.errorbar(kvalues, score_means, np.array(score_stds))

    plt.title(title)
    plt.xlabel('Number of Features')
    plt.ylabel('Prediction rate')

    plt.axis('tight')
    return plt,score_means,feat_scores
开发者ID:kellystifter,项目名称:PulseClassificationForLUX,代码行数:60,代码来源:featureSelection.py


注:本文中的sklearn.pipeline.Pipeline.set_params方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。