Python pipeline.make_pipeline函数代码示例

本文整理汇总了Python中sklearn.pipeline.make_pipeline函数的典型用法代码示例。如果您正苦于以下问题：Python make_pipeline函数的具体用法？Python make_pipeline怎么用？Python make_pipeline使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了make_pipeline函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

def main(met_fname, gday_outfname, var):

    # Load met data
    s = remove_comments_from_header(met_fname)
    df_met = pd.read_csv(s, parse_dates=[[0,1]], skiprows=4, index_col=0,
                         sep=",", keep_date_col=True,
                         date_parser=date_converter)

    # Need to build numpy array, so drop year, doy cols
    met_data = df_met.ix[:,2:].values
    met_data_train = df_met.ix[0:4000,2:].values

    # Load GDAY outputs
    df = pd.read_csv(gday_outfname, skiprows=3, sep=",", skipinitialspace=True)
    df['date'] = make_data_index(df)
    df = df.set_index('date')

    target = df[var][0:4000].values

    # BUILD MODELS

    # hold back 40% of the dataset for testing
    #X_train, X_test, Y_train, Y_test = \
    #    cross_validation.train_test_split(met_data, target, \
    #                                      test_size=0.4, random_state=0)


    param_KNR = { "n_neighbors": [20], "weights": ['distance'] }

    #regmod = DecisionTreeRegressor()
    #regmod = RandomForestRegressor()
    #regmod = SVR()
    regmod = KNeighborsRegressor()


    pipeit3 = lambda model: make_pipeline(StandardScaler(), PCA(), model)
    pipeit2 = lambda model: make_pipeline(StandardScaler(), model)
    regmod_p = pipeit2(regmod)
    modlab = regmod_p.steps[-1][0]
    par_grid = {'{0}__{1}'.format(modlab, parkey): pardat \
                 for (parkey, pardat) in param_KNR.iteritems()}

    #emulator = GridSearchCV(regmod, param_grid=param_DTR, cv=5)
    emulator = GridSearchCV(regmod_p, param_grid=par_grid, cv=5)

    #emulator.fit(X_train, Y_train)
    emulator.fit(met_data_train, target)
    predict = emulator.predict(met_data)


    df = pd.DataFrame({'DT': df.index, 'emu': predict, 'gday': df[var]})


    plt.plot_date(df.index[4000:4383], df['emu'][4000:4383], 'o',
                  label='Emulator')
    plt.plot_date(df.index[4000:4383], df['gday'][4000:4383], 'o',
                  label='GDAY')
    plt.ylabel('GPP (g C m$^{-2}$ s$^{-1}$)')
    plt.legend()
    plt.show()

开发者ID:mdekauwe，项目名称:gday_emulator，代码行数:60，代码来源:emulator.py

示例2: test_pipeline_ducktyping

def test_pipeline_ducktyping():
    pipeline = make_pipeline(Mult(5))
    pipeline.predict
    pipeline.transform
    pipeline.inverse_transform

    pipeline = make_pipeline(Transf())
    assert not hasattr(pipeline, 'predict')
    pipeline.transform
    pipeline.inverse_transform

    pipeline = make_pipeline('passthrough')
    assert pipeline.steps[0] == ('passthrough', 'passthrough')
    assert not hasattr(pipeline, 'predict')
    pipeline.transform
    pipeline.inverse_transform

    pipeline = make_pipeline(Transf(), NoInvTransf())
    assert not hasattr(pipeline, 'predict')
    pipeline.transform
    assert not hasattr(pipeline, 'inverse_transform')

    pipeline = make_pipeline(NoInvTransf(), Transf())
    assert not hasattr(pipeline, 'predict')
    pipeline.transform
    assert not hasattr(pipeline, 'inverse_transform')

开发者ID:allefpablo，项目名称:scikit-learn，代码行数:26，代码来源:test_pipeline.py

示例3: preprocess

    def preprocess(self,any_set,is_train):

        if is_train:
            dico_pattern={'match_lowercase_only':'\\b[a-z]+\\b',
              'match_word':'\\w{2,}',
              'match_word1': '(?u)\\b\\w+\\b',
              'match_word_punct': '\w+|[,.?!;]',
              'match_NNP': '\\b[A-Z][a-z]+\\b|\\b[A-Z]+\\b',
              'match_punct': "[,.?!;'-]"
             }

            tfv_title = TfidfVectorizer(lowercase=True, stop_words='english', token_pattern=dico_pattern["match_word1"],
                              ngram_range=(1, 2), max_df=1.0, min_df=2, max_features=None,
                              vocabulary=None, binary=True, norm=u'l2',
                              use_idf=True, smooth_idf=True, sublinear_tf=True)

            tfv_desc = TfidfVectorizer(lowercase=True, stop_words='english', token_pattern=dico_pattern["match_word1"],
                              ngram_range=(1, 2), max_df=1.0, min_df=2, max_features=None,
                              vocabulary=None, binary=True, norm=u'l2',
                              use_idf=True, smooth_idf=True, sublinear_tf=True)

            title_pipe = make_pipeline(ColumnSelector(key='title'), tfv_title)
            desc_pipe = make_pipeline(ColumnSelector(key='description'), tfv_desc)
            self.pipeline = make_union(title_pipe, desc_pipe)

            return self.pipeline.fit_transform(any_set)
        else:
            return self.pipeline.transform(any_set)

开发者ID:Cadene，项目名称:DataScienceGame，代码行数:28，代码来源:Predictor.py

示例4: init

    def __init__(self, **config):
        # Validate options are present
        for option in _configuration_options:
            if option not in config:
                raise ValueError("Missing configuration "
                                 "option {!r}".format(option))

        # Feature extraction
        sparse_features = parse_features(config["sparse_features"])
        densifier = make_pipeline(Vectorizer(sparse_features, sparse=True),
                                  ClassifierAsFeature())
        dense_features = parse_features(config["dense_features"])
        vectorization = make_union(densifier,
                                   Vectorizer(dense_features, sparse=False))

        # Classifier
        try:
            classifier = _valid_classifiers[config["classifier"]]
        except KeyError:
            raise ValueError("Unknown classification algorithm "
                             "{!r}".format(config["classifier"]))
        classifier = classifier(**config["classifier_args"])

        self.pipeline = make_pipeline(vectorization, StandardScaler())
        self.classifier = classifier

开发者ID:52nlp，项目名称:iepy，代码行数:25，代码来源:relation_extraction_classifier.py

示例5: get_pipeline

def get_pipeline(fsmethods, clfmethod):
    """Returns an instance of a sklearn Pipeline given the parameters
    fsmethod1 and fsmethod2 will be joined in a FeatureUnion, then it will joined
    in a Pipeline with clfmethod

    Parameters
    ----------
    fsmethods: list of estimators
        All estimators in a pipeline, must be transformers (i.e. must have a transform method).

    clfmethod: classifier
        The last estimator may be any type (transformer, classifier, etc.).

    Returns
    -------
    pipe
    """
    feat_union = None
    if not isinstance(fsmethods, list):
        if hasattr(fsmethods, 'transform'):
            feat_union = fsmethods
        else:
            raise ValueError('fsmethods expected to be either a list or a transformer method')
    else:
        feat_union = make_union(*fsmethods)

    if feat_union is None:
        pipe = make_pipeline(clfmethod)
    else:
        pipe = make_pipeline(feat_union, clfmethod)

    return pipe

开发者ID:Neurita，项目名称:darwin，代码行数:32，代码来源:sklearn_utils.py

示例6: analysis

def analysis(name, typ, condition=None, query=None, title=None):
    """Wrapper to ensure that we attribute the same function for each type
    of analyses: e.g. categorical, regression, circular regression."""
    # Define univariate analysis
    erf_function = None  # Default is fast_mannwhitneyu
    # /!\ for categorical analyses, the contrast is min(y) - max(y)
    # e.g. target_present==False - target_present==True

    if typ == 'categorize':
        # estimator is normalization + l2 Logistic Regression
        clf = make_pipeline(
            StandardScaler(),
            force_predict(LogisticRegression(class_weight='balanced'), axis=1))
        scorer = scorer_auc
        chance = .5
    elif typ == 'regress':
        # estimator is normalization + l2 Ridge
        clf = make_pipeline(StandardScaler(), Ridge())
        scorer = scorer_spearman
        chance = 0.
    elif typ == 'circ_regress':
        # estimator is normalization + l2 Logistic Regression on cos and sin
        clf = make_pipeline(StandardScaler(), PolarRegression(Ridge()))
        scorer = scorer_angle
        chance = 0.
        # The univariate analysis needs a different scorer
        erf_function = scorer_circlin
    if condition is None:
        condition = name
    return dict(name=name, condition=condition, query=query, clf=clf,
                scorer=scorer, chance=chance, erf_function=erf_function,
                cv=8, typ=typ, title=title, single_trial=True)

开发者ID:kingjr，项目名称:decoding_unconscious_maintenance，代码行数:32，代码来源:conditions.py

示例7: test_pipeline_ducktyping

def test_pipeline_ducktyping():
    pipeline = make_pipeline(Mult(5))
    pipeline.predict
    pipeline.transform
    pipeline.inverse_transform

    pipeline = make_pipeline(Transf())
    assert_false(hasattr(pipeline, 'predict'))
    pipeline.transform
    pipeline.inverse_transform

    pipeline = make_pipeline(None)
    assert_false(hasattr(pipeline, 'predict'))
    pipeline.transform
    pipeline.inverse_transform

    pipeline = make_pipeline(Transf(), NoInvTransf())
    assert_false(hasattr(pipeline, 'predict'))
    pipeline.transform
    assert_false(hasattr(pipeline, 'inverse_transform'))

    pipeline = make_pipeline(NoInvTransf(), Transf())
    assert_false(hasattr(pipeline, 'predict'))
    pipeline.transform
    assert_false(hasattr(pipeline, 'inverse_transform'))

开发者ID:dsquareindia，项目名称:scikit-learn，代码行数:25，代码来源:test_pipeline.py

示例8: test_bagging_classifier_with_missing_inputs

def test_bagging_classifier_with_missing_inputs():
    # Check that BaggingClassifier can accept X with missing/infinite data
    X = np.array([
        [1, 3, 5],
        [2, None, 6],
        [2, np.nan, 6],
        [2, np.inf, 6],
        [2, np.NINF, 6],
    ])
    y = np.array([3, 6, 6, 6, 6])
    classifier = DecisionTreeClassifier()
    pipeline = make_pipeline(
        FunctionTransformer(replace, validate=False),
        classifier
    )
    pipeline.fit(X, y).predict(X)
    bagging_classifier = BaggingClassifier(pipeline)
    bagging_classifier.fit(X, y)
    y_hat = bagging_classifier.predict(X)
    assert_equal(y.shape, y_hat.shape)
    bagging_classifier.predict_log_proba(X)
    bagging_classifier.predict_proba(X)

    # Verify that exceptions can be raised by wrapper classifier
    classifier = DecisionTreeClassifier()
    pipeline = make_pipeline(classifier)
    assert_raises(ValueError, pipeline.fit, X, y)
    bagging_classifier = BaggingClassifier(pipeline)
    assert_raises(ValueError, bagging_classifier.fit, X, y)

开发者ID:daniel-perry，项目名称:scikit-learn，代码行数:29，代码来源:test_bagging.py

示例9: cross_validation_LR

def cross_validation_LR(X,Y, n_folds, C_seq, K_seq, verbose = False):
    '''
        To classify Y using X, we first use ANOVA to choose K dimensions
        in X, where the difference between different Ys are highest, then run 
        a logistic regression classifier with regularization parameter C on 
        the K dimensions. 
         
        To quantify how well X can classify Y, without specifying training and 
        testing partition, we do n_folds cross validation.
        In each fold, during training, we do an inner loop cross validation to
        select C and K that give the best classification accuracy from a given 
        range; and then we use this to classify the held-out testing data. 
         
        Inputs:
            X, [n, p], n trials of p dimensional data, used for classification
            Y, [n], class labels
            n_folds,integer,  split the data into n_folds for cross validation
            C_seq, a sequence of regularizatioin parameters for logistic 
                    regression classifiers, smaller values specify stronger
                    regularization.
                    e.g. C_seq = 10.0** np.arange(-3,1,1)
            K_seq, a sequence of integers, 
                    e.g.  K_seq = (np.floor(np.arange(0.2,1,0.2)*p)).astype(np.int)
            verbose: boolean, if ture, print the best C and K chosen
        Output:
            averaged classification accuracy of the n_folds
    '''
    cv0 = StratifiedKFold(Y,n_folds = n_folds)
    cv_acc = np.zeros(n_folds)
    for i in range(n_folds):
        ind_test = cv0.test_folds == i
        ind_train = cv0.test_folds != i
        tmpX_train = X[ind_train,:]
        tmpY_train = Y[ind_train]
        tmpX_test = X[ind_test,:]
        tmpY_test = Y[ind_test]
         
        # grid search
        tmp_cv_score = np.zeros([len(C_seq), len(K_seq)])
        for j in range(len(C_seq)):
            for k in range(len(K_seq)):
                cv1 = StratifiedKFold(tmpY_train,n_folds = n_folds)
                anova_filter = SelectKBest(f_regression, k = K_seq[k])
                clf = LogisticRegression(C = C_seq[j], penalty = "l2")
                anova_clf = make_pipeline(anova_filter, clf)
                tmp_cv_score[j,k] = cross_val_score(anova_clf, tmpX_train,
                                  tmpY_train, scoring = "accuracy",  cv = cv1).mean()
         
        best_ind = np.argmax(tmp_cv_score.ravel())
        best_j, best_k = np.unravel_index(best_ind, tmp_cv_score.shape)
         
        anova_filter = SelectKBest(f_regression, k = K_seq[k])
        clf = LogisticRegression(C = C_seq[j], penalty = "l2")
        anova_clf = make_pipeline(anova_filter, clf)
        tmpY_predict = anova_clf.fit(tmpX_train, tmpY_train).predict(tmpX_test) 
        if verbose: 
            print C_seq[best_j],K_seq[best_k]          
        cv_acc[i] =  np.mean(tmpY_test == tmpY_predict)    
    return np.mean(cv_acc)

开发者ID:YingYang，项目名称:misc_neuro_imaging_analysis_code，代码行数:59，代码来源:decoding.py

示例10: test_generator_ok

 def test_generator_ok(self):
     pipeline = make_pipeline(FakeGenerator(fakes=['job', 'name', 'address'], nb_sample=20, random_state=40))
     result = pipeline.fit_transform(None)
     self.assertEqual(result.shape, (20, 3))
     pipeline = make_pipeline(FakeGenerator(fakes=['job', 'name', 'address'], nb_sample=20, random_state=40))
     result_2 = pipeline.fit_transform(None)
     # Testing the seed
     assert_frame_equal(result, result_2)

开发者ID:romainx，项目名称:pyranha，代码行数:8，代码来源:test_io.py

示例11: fit

 def fit(self, X, y):
     # Filthy hack
     sids = X[:, -1]
     all_pipelines = [make_pipeline(LogisticRegressionCV()).fit(X_s, y_s) for
                      X_s, y_s in subject_splitter(X[:, :-1], y, sids)]
     f_union = make_union(*[FeatureUnionWrapper(p) for p in all_pipelines])
     self.clf_ = make_pipeline(f_union, LogisticRegressionCV()).fit(X[:, :-1], y)
     return self

开发者ID:kastnerkyle，项目名称:kaggle-decmeg2014，代码行数:8，代码来源:minimal_clf.py

示例12: test_make_pipeline_memory

def test_make_pipeline_memory():
    cachedir = mkdtemp()
    memory = Memory(cachedir=cachedir)
    pipeline = make_pipeline(DummyTransf(), SVC(), memory=memory)
    assert_true(pipeline.memory is memory)
    pipeline = make_pipeline(DummyTransf(), SVC())
    assert_true(pipeline.memory is None)

    shutil.rmtree(cachedir)

开发者ID:lebigot，项目名称:scikit-learn，代码行数:9，代码来源:test_pipeline.py

示例13: init

 def __init__(self):
     self.clf1 = [make_pipeline(Imputer(),
                              GradientBoostingRegressor(n_estimators=5000, max_depth=8)) for _ in range(5)]
     self.clf2 = [make_pipeline(Imputer(strategy='median'),
                               ExtraTreesRegressor(n_estimators=5000, criterion='mse', max_depth=8,
                                                   min_samples_split=10, min_samples_leaf=1,
                                                   min_weight_fraction_leaf=0.0,
                                                   max_features='auto', max_leaf_nodes=None, bootstrap=False,
                                                   oob_score=False,
                                                   n_jobs=1, random_state=42, verbose=0, warm_start=True)) for _ in range(5)]
     self.clf3 = [make_pipeline(Imputer(),
                               svm.LinearSVR()) for _ in range(5)]
     self.clf = [linear_model.LinearRegression() for _ in range(5)]

开发者ID:BenSchannes，项目名称:Epidemium，代码行数:13，代码来源:Regressor_blend_1.py

示例14: test_classes_property

def test_classes_property():
    iris = load_iris()
    X = iris.data
    y = iris.target

    reg = make_pipeline(SelectKBest(k=1), LinearRegression())
    reg.fit(X, y)
    assert_raises(AttributeError, getattr, reg, "classes_")

    clf = make_pipeline(SelectKBest(k=1), LogisticRegression(random_state=0))
    assert_raises(AttributeError, getattr, clf, "classes_")
    clf.fit(X, y)
    assert_array_equal(clf.classes_, np.unique(y))

开发者ID:Givonaldo，项目名称:scikit-learn，代码行数:13，代码来源:test_pipeline.py

示例15: test_make_pipeline_memory

def test_make_pipeline_memory():
    cachedir = mkdtemp()
    if LooseVersion(joblib_version) < LooseVersion('0.12'):
        # Deal with change of API in joblib
        memory = Memory(cachedir=cachedir, verbose=10)
    else:
        memory = Memory(location=cachedir, verbose=10)
    pipeline = make_pipeline(DummyTransf(), SVC(), memory=memory)
    assert_true(pipeline.memory is memory)
    pipeline = make_pipeline(DummyTransf(), SVC())
    assert_true(pipeline.memory is None)

    shutil.rmtree(cachedir)

开发者ID:as133，项目名称:scikit-learn，代码行数:13，代码来源:test_pipeline.py

注：本文中的sklearn.pipeline.make_pipeline函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。