当前位置: 首页>>代码示例>>Python>>正文


Python Pipeline.fit方法代码示例

本文整理汇总了Python中sklearn.pipeline.Pipeline.fit方法的典型用法代码示例。如果您正苦于以下问题:Python Pipeline.fit方法的具体用法?Python Pipeline.fit怎么用?Python Pipeline.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.pipeline.Pipeline的用法示例。


在下文中一共展示了Pipeline.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: calcCSPLDA

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit [as 别名]
    def calcCSPLDA(epochs_train, labels_train, nb):
        """Creates the CSP+LDA pipeline and applies it to training data. 
        (just really a function to call the MNE and SKlearn processing functs)

        Parameters
        ----------
        epochs_train : epochs in mne data format

        labels_train : labels of epochs in mne format

        nb: number of CSP components, must be even. (6 implies the 3 top-most and bottom eigenvectors)

        Returns
        -------
        clf : the fitted model for the CSP+LDA approach

        csp.filters_ : CSP weight vector, shape (nchannels, nchannels)

        svc.coef_ : LDA weight vector, shape (1, nb)

        Examples
        --------
        >>> data_path = "/PATH/TO/FILE/somematrix.txt"
        >>> matrix_data = loadAsMatrix(data_path)
        """
        svc = LDA()
        csp = CSP(n_components=4, reg=None, log=True, cov_est='epoch')
        clf = Pipeline([('CSP', csp), ('SVC', svc)])

        epochs_data = epochs_train.get_data()

        clf.fit(epochs_data, labels_train)

        return clf, csp.filters_, svc.coef_
开发者ID:renangohe,项目名称:bci_training_platform,代码行数:36,代码来源:DataProcessing.py

示例2: KFOLDTEST

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit [as 别名]
    def KFOLDTEST(self, text, sent):
        k_fold = KFold(n=len(text), n_folds=6)

        pipeline = Pipeline(
            [
                ("vectorizer", CountVectorizer(ngram_range=(1, 2), tokenizer=self.tokenize_data)),
                ("tfidf", TfidfTransformer(norm="l2", smooth_idf=False, use_idf=False)),
                ("classifier", OneVsOneClassifier(LinearSVC())),
            ]
        )

        scores = []
        for train_indices, test_indices in k_fold:
            # print('Train: %s | test: %s' % (train_indices, test_indices))
            train_text = text[train_indices]
            train_y = sent[train_indices]

            test_text = text[test_indices]
            test_y = sent[test_indices]

            pipeline.fit(train_text, train_y)
            score = pipeline.score(test_text, test_y)
            scores.append(score)

        score = sum(scores) / len(scores)
        print ("scores ", scores, " Score ", score)
        return score
开发者ID:jeevananne,项目名称:evolveML,代码行数:29,代码来源:KaggleMovieSentAnalysis.py

示例3: MachineLearning

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit [as 别名]
class MachineLearning(object):
    def __init__(self):
        # Initialize classifier and vectorizer
        self.clf = Pipeline([('tfidf', TfidfVectorizer(min_df=1, ngram_range=(1, 2))),
                             ('clf', MultinomialNB(alpha=.01)),
                            ])

    def init_training(self):
        self.x_train = []
        self.y_train = []

    def add_training_data(self, data, label):
        self.x_train.append(data)
        self.y_train.append(label)

    # Train classifier
    # Can also use grid search to optimize accuracy, like
    '''
    parameters = {'tfidf__ngram_range': [(1, 1), (1, 2)],
                  'clf__alpha': (.01, .001),
    }
    gs_clf = GridSearchCV(clf, parameters, n_jobs=-1)
    '''
    def train(self):
        self.clf.fit(self.x_train, self.y_train)

    # Predict result
    # We can roughly estimate the accuracy using cross validation, like
    '''
    result = clf.predict(test_dc + test_marvel)
    baseline = [0 for x in range(len(test_dc))] + [1 for x in range(len(test_marvel))]
    print np.sum(result == baseline) / float(len(result))
    '''
    def predict(self, data):
        return self.clf.predict([data])[0]
开发者ID:skimmilk8888,项目名称:SuperHero,代码行数:37,代码来源:train.py

示例4: test

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit [as 别名]
def test():
    target_label = [u'weather', u'audio',u'pic',u'calculate',u'music', u'poem']
    training_text_raw = []
    training_label = []
    with open ('./training_source.csv','r') as f:
        for line in f.readlines():
            line = line.strip().split('\t')
            if len(line) > 1 and line[1] in target_label:
                training_text_raw.append(unicode(line[0],"utf-8"))
                training_label.append(line[1])
        print training_label

        training_text = []
    for text in training_text_raw:
        seg_text = seg(text)
        training_text.append(seg_text)
    text_clf = Pipeline([('vect', CountVectorizer()),
                     ('tfidf', TfidfTransformer(use_idf=False)),

                     ('clf', MultinomialNB()),
])

    scores = cross_validation.cross_val_score(text_clf, training_text, training_label, cv=8)
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

    text_clf.fit(training_text, training_label)

    while True:
        k_text = raw_input("\nPlease input:")
        if k_text == "exit":
            break
        print text_clf.predict([seg(unicode(k_text,'utf-8'))])
开发者ID:jaean1993,项目名称:flight01,代码行数:34,代码来源:MultinomialNB.py

示例5: Regressor

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit [as 别名]
class Regressor(BaseEstimator):
    def __init__(self):
        self.clf = Pipeline([
            ("RF", RandomForestRegressor(n_estimators=200, max_depth=15,
                                         n_jobs=N_JOBS))])
        self.scaler = StandardScaler()
        self.agglo = FeatureAgglomeration(n_clusters=500)

    def fit(self, X, y):
        y = y.ravel()
        n_samples, n_lags, n_lats, n_lons = X.shape
        self.scaler.fit(X[:, -1].reshape(n_samples, -1))
        X = X.reshape(n_lags * n_samples, -1)
        connectivity = grid_to_graph(n_lats, n_lons)
        self.agglo.connectivity = connectivity
        X = self.scaler.transform(X)
        X = self.agglo.fit_transform(X)
        X = X.reshape(n_samples, -1)
        self.clf.fit(X, y)

    def predict(self, X):
        n_samples, n_lags, n_lats, n_lons = X.shape
        X = X.reshape(n_lags * n_samples, -1)
        X = self.scaler.transform(X)
        X = self.agglo.transform(X)
        X = X.reshape(n_samples, -1)
        return self.clf.predict(X)
开发者ID:agramfort,项目名称:el_nino_ramp,代码行数:29,代码来源:regressor.py

示例6: clasificador

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit [as 别名]
    def clasificador(self,X_train, y_train, X_test, target_names, y_test,all_labels):
        
        lb = preprocessing.MultiLabelBinarizer()
        Y = lb.fit_transform(y_train)
        
        classifier = Pipeline([
            ('vectorizer',CountVectorizer(strip_accents='unicode')),
            ('tfidf',TfidfTransformer()),
            ('to_dense', DenseTransformer()),
            ('clf',OneVsRestClassifier(GaussianNB()))])
            


     
        classifier.fit(X_train,Y)
        
        predicted = classifier.predict(X_test)


        etiquetas = lb.inverse_transform(predicted)

                
        for i in range(0,len(etiquetas)):
            etiquetas[i]=list(etiquetas[i])

        
        valoresMacro = self.macro(etiquetas,y_test)
        valoresMicro = self.micro(etiquetas, y_test)        
开发者ID:josearcosaneas,项目名称:RepositorioPara-la-entrega-del-TFG,代码行数:30,代码来源:resumen+mas+extractoGB.py

示例7: svcDictVector

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit [as 别名]
def svcDictVector():
    recipeData = getRecipeData()
    
    labels = [recipe['cuisine'] for recipe in recipeData]
    ingredientsFixtures = [sorted(set(e['ingredients'])) for e in recipeData]
    for i, w in enumerate(ingredientsFixtures):
        ingredientsFixtures[i] = dict(zip(w, [1] * len(w)))        
                
    pipeline = Pipeline([
        ('dict', DictVectorizer()),
        ('variance', VarianceThreshold()),        
        ('tfidf', TfidfTransformer()),
        ('bayes', svm.LinearSVC()),
    ])    
    
    pipeline.fit(ingredientsFixtures, labels)
    print pipeline
    
    testRecipes = getTestData()    
    testIngredientsFixtures = [sorted(set(e['ingredients'])) for e in testRecipes]
    for i, w in enumerate(testIngredientsFixtures):
        testIngredientsFixtures[i] = dict(zip(w, [1] * len(w)))
        
    predictions = pipeline.predict(testIngredientsFixtures)    
    outputPercentCorrect(predictions)     
    copyAndOutput(predictions, testRecipes)
开发者ID:adatta02,项目名称:whats-cooking,代码行数:28,代码来源:fit.py

示例8: useTFIDF

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit [as 别名]
def useTFIDF():
    print "TFIDF"
    trainData = pd.read_csv("data/multinomialTrain.csv", header=0)
    # dat = trainData[["rating", 'numDet', 'innerPunctuation','avgWordLength',
    #                       'numPresVerb',  "numFirstPerson",'numPropNoun', "numOtherNoun", "numWords", "numAdj",
    #                        "numPastVerb", "numConj", "exclamationPoints"]]
    dat = trainData


    knn = KNeighborsClassifier(n_neighbors=21, weights='distance')
    scaler = preprocessing.StandardScaler()
    tfidf = TfidfTransformer()
    tfidf_scaled_knn = Pipeline([('tfidf', tfidf), ('knn', knn)])

    kf = KFold(len(trainData), n_folds=3, shuffle=True)
    for train, test in kf:
        trainX, trainy = transform_sklearn_dictionary(transform_csv(dat.iloc[train], target_col="rating",
                                                                    ignore_cols=["01v234", "2v34", "words","words_nostopwords",
                                                                     "review", 'numDet', 'innerPunctuation','avgWordLength','numPresVerb',  "numFirstPerson",'numPropNoun', "numOtherNoun", "numWords", "numAdj",
                                                                     "numPastVerb", "numConj", "exclamationPoints"]))
        testX, testy = transform_sklearn_dictionary(transform_csv(dat.iloc[test], target_col="rating",
                                                                  ignore_cols=["01v234", "2v34", "words","words_nostopwords",
                                                                     "review", 'numDet', 'innerPunctuation','avgWordLength','numPresVerb',  "numFirstPerson",'numPropNoun', "numOtherNoun", "numWords", "numAdj",
                                                                     "numPastVerb", "numConj", "exclamationPoints"]))
        tfidf_scaled_knn.fit(trainX, trainy)
        print tfidf_scaled_knn.score(testX, testy)
开发者ID:decodyng,项目名称:mlgroup5,代码行数:28,代码来源:knn.py

示例9: main

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit [as 别名]
def main():
    data = import_files(filenames)
    sentences = defaultdict(lambda: [])
    # invert the dictionary
    for cat in data:
        if cat == 'yn':
            continue
        for sentence in data[cat]:
            sentences[sentence].append(cat)

    X_list = []
    y_data = []
    for s in sentences:
        X_list.append(s)
        y_data.append(sentences[s])
    X_data = np.array(X_list)

    # X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.01, random_state=802701)\
    X_train = X_data
    y_train = y_data

    classifier = Pipeline([
                    ('vectorizer', TfidfVectorizer()),
                    ('clf', OneVsRestClassifier(LinearSVC()))])
    classifier.fit(X_train, y_train)

    save_classifier(classifier, outfile)
开发者ID:vincom2,项目名称:11411-project,代码行数:29,代码来源:train_qntype.py

示例10: Classifier

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit [as 别名]
class Classifier(BaseEstimator):

    def __init__(self, rf_max_depth=10, rf_n_estimators=50, n_estimators=50, n_jobs=1):
        self.rf_max_depth = rf_max_depth
        self.rf_n_estimators = rf_n_estimators
        self.n_estimators = n_estimators
        self.n_jobs = n_jobs

    def fit(self, X, y):
        self.clf = Pipeline([
            ('rf', AdaBoostClassifier(
                base_estimator=RandomForestClassifier(
                    max_depth=self.rf_max_depth, n_estimators=self.rf_n_estimators,
                    n_jobs=self.n_jobs),
                n_estimators=self.n_estimators)
             )
        ])
        self.clf.fit(X, y)
        return self

    def predict(self, X):
        return self.clf.predict(X)

    def predict_proba(self, X):
        return self.clf.predict_proba(X)
开发者ID:agramfort,项目名称:variable_stars_hackaton,代码行数:27,代码来源:classifier.py

示例11: allFeatureClassify

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit [as 别名]
def allFeatureClassify(cosine=False):
    print "AllFeatureClassifier"
    if cosine:
        print "Cosine"
    trainData = pd.read_csv("data/multinomialTrain.csv", header=0)
    # dat = trainData[["rating", 'numDet', 'innerPunctuation','avgWordLength',
    #                       'numPresVerb',  "numFirstPerson",'numPropNoun', "numOtherNoun", "numWords", "numAdj",
    #                        "numPastVerb", "numConj", "exclamationPoints"]]
    dat = trainData


    if cosine:
        knn = KNeighborsClassifier(n_neighbors=21, metric=pairwise.cosine_similarity)
    else:
        knn = KNeighborsClassifier(n_neighbors=21)
    scaler = preprocessing.StandardScaler()
    scaled_knn = Pipeline([('scaler', scaler), ('knn', knn)])

    kf = KFold(len(trainData), n_folds=3, shuffle=True)
    for train, test in kf:
        trainX, trainy = transform_sklearn_dictionary(transform_csv(dat.iloc[train], target_col="rating",
                                                                    ignore_cols=["01v234", "2v34", "words",
                                                                                 "words_nostopwords", "review"]))
        testX, testy = transform_sklearn_dictionary(transform_csv(dat.iloc[test], target_col="rating",
                                                                  ignore_cols=["01v234", "2v34", "words",
                                                                                 "words_nostopwords", "review"]))
        scaled_knn.fit(trainX, trainy)
        print scaled_knn.score(testX, testy)
开发者ID:decodyng,项目名称:mlgroup5,代码行数:30,代码来源:knn.py

示例12: cross_validation

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit [as 别名]
 def cross_validation(self, X, Y, n_folds=10):
     """ n-fold cross validation to get the best classifier. """
     kf = KFold(len(X), n_folds=n_folds)
     best_accuracy = -1
     training_accuracy = 0
     for train, cv in kf:
         classifier = Pipeline([('vect', CountVectorizer()),
                                ('tfidf', TfidfTransformer()),
                                ('svm', LinearSVC(C=1))])
         # forms the training and test set
         X_train = []
         X_train.extend(X[0:cv[0]])
         X_train.extend(X[cv[-1]:])
         Y_train = []
         Y_train.extend(Y[0:cv[0]])
         Y_train.extend(Y[cv[-1]:])
         X_cv = X[cv[0]:cv[-1]+1]
         Y_cv = Y[cv[0]:cv[-1]+1]
         classifier.fit(X_train, Y_train)
         accuracy = self.__accuracy(classifier, X_cv, Y_cv)
         if accuracy > best_accuracy:
             best_classifier = classifier
             best_accuracy = accuracy
             training_accuracy = self.__accuracy(
                 classifier, X_train, Y_train)
     return best_classifier, training_accuracy, best_accuracy
开发者ID:name3anad,项目名称:SVMDemo,代码行数:28,代码来源:classifier2.py

示例13: run

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit [as 别名]
def run(training, validation, k, config=None):
    isError, OneError, nDocs = 0, 0, 0
    margins, AP = [], []

    class_index = Index()
    traindocs, train_X, train_y = zip(*load_data(training, class_index))
    testdocs, test_X, test_y = zip(*load_data(validation, class_index))

    n_iter = np.ceil(10**6 / len(traindocs))

    clf = SGDClassifier(alpha=.000001, loss='log', n_iter=50, penalty='elasticnet')
    #clf = MultinomialNB(alpha=0.000001)

    classifier = Pipeline([
                ('vectorizer', CountVectorizer(min_df=1, max_df=1.0, analyzer=lambda t: t)),
                ('tfidf', TfidfTransformer(norm='l2')),
                ('clf', OneVsRestClassifier(clf, n_jobs=-1))])

    classifier.fit(train_X, train_y)
    predictions = classifier.predict_proba(test_X)
    for j, prediction in enumerate(predictions):
        nDocs += 1
        refs = np.zeros(len(prediction))
        refs[list(test_y[j])] = 1
        preds = sorted(range(len(prediction)), key=lambda i: prediction[i], reverse=True)
        refs = set(test_y[j])
        ap = average_precision(preds, refs)
        AP.append(ap)
        isError += is_error(ap)
        OneError += one_error(preds, refs)
        margins.append(margin(preds, refs))
    return isError, OneError, nDocs, margins, AP
开发者ID:fbkarsdorp,项目名称:MotifRetrieval,代码行数:34,代码来源:sgd.py

示例14: pipeline_test

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit [as 别名]
def pipeline_test(params, data_path, dataset):
    data_train = os.path.expanduser(os.path.join(data_path, dataset, 'train.arff'))
    X_train, y_train = load_arff_data(data_train)

    data_test = os.path.expanduser(os.path.join(data_path, dataset, 'test.arff'))
    X_test, y_test = load_arff_data(data_test)

    dpr = get_data_preprocessor_rescaling(params)
    params = get_data_preprocessor_balancing(params, y_train)
    fp = get_feature_preprocessor(params)
    clf = get_classifier(params)

    steps = []
    if dpr is not None:
        steps.append(('data_preprocessor_rescaling', dpr))
    if fp is not None:
        steps.append(('feature_preprocessor', fp))
    steps.append(('classifier', clf))

    ppl = Pipeline(steps)
    ppl.fit(X_train, y_train)
    y_pred = ppl.predict(X_test)
    score = accuracy_score(y_test, y_pred)
    result = 100.0 - 100.0 * score

    return result
开发者ID:yuyuz,项目名称:FLASH,代码行数:28,代码来源:ml_framework.py

示例15: Model10

# 需要导入模块: from sklearn.pipeline import Pipeline [as 别名]
# 或者: from sklearn.pipeline.Pipeline import fit [as 别名]
class Model10(Model):
  def __init__(self):
    pass
  def fit(self, Xmask, y):
    pr = prepare.Prepare_0(model=10, preproc=1, min_df=1, use_svd=False, tfidf=2,
        stemmer=0)
    (X_all_df,_,BP,params) = pr.load_transform(update=False)
    names = list(X_all_df.columns)
    X_all = np.asarray(X_all_df)
    self.X_all, self.names = X_all, names

    clf0 = GaussianNB()
    clf1 = MultinomialNB(alpha=0.8)
    clf2 = BernoulliNB(alpha=1, binarize=0.01)

    clf = clf1
    self.rd = Pipeline([
        ("trans", Transformer(names=self.names, X_all=X_all, BP=BP)),
        #("scaler",StandardScaler(with_mean=False)), 
        ("est", clf)
        ])

    self.rd.fit(Xmask,np.asarray(y))
    return self
  def predict_proba(self, Xmask):
    return self.rd.predict_proba(Xmask)
  def predict(self, Xmask):
    return self.rd.predict(Xmask)
  
  def starter(self):
    print "Model10 starter"
    self.fit(np.arange(100),np.arange(100))
开发者ID:orazaro,项目名称:stumbleupon_kaggle,代码行数:34,代码来源:model10.py


注:本文中的sklearn.pipeline.Pipeline.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。