当前位置: 首页>>代码示例>>Python>>正文


Python Normalizer.fit方法代码示例

本文整理汇总了Python中sklearn.preprocessing.Normalizer.fit方法的典型用法代码示例。如果您正苦于以下问题:Python Normalizer.fit方法的具体用法?Python Normalizer.fit怎么用?Python Normalizer.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.preprocessing.Normalizer的用法示例。


在下文中一共展示了Normalizer.fit方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: TfIdf

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit [as 别名]
class TfIdf(Feature):
    def __init__(self):
        self.kbest = None
        self.vect = None
        self.truncated = None
        self.normalizer = None

    def train(self, reviews, labels):
        self.vect = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), stop_words='english')

        reviews_text = [' '.join(list(chain.from_iterable(review))) for review in reviews]
        tfidf_matrix = self.vect.fit_transform(reviews_text).toarray()

        self.truncated = TruncatedSVD(n_components=50)
        self.truncated.fit(tfidf_matrix, labels)

        trunc = self.truncated.transform(tfidf_matrix)
        self.normalizer = Normalizer()
        self.normalizer.fit(trunc)

        self.kbest = SelectKBest(f_classif, k=5)
        self.kbest.fit(self.normalizer.transform(trunc), labels)

    def score(self, data):
        reviews_text = ' '.join(list(chain.from_iterable(data)))
        tfidf_matrix = self.vect.transform([reviews_text]).toarray()

        trunc = self.truncated.transform(tfidf_matrix)

        return tuple(self.kbest.transform(self.normalizer.transform(trunc))[0, :])
开发者ID:EdwardBetts,项目名称:Yulp,代码行数:32,代码来源:tfidf.py

示例2: ScikitNormalizer

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit [as 别名]
class ScikitNormalizer(object):
    def __init__(self):
        self.data_normalizer = Normalizer()

    def fit(self, data):
        self.data_normalizer.fit(data)

    def transform(self, data):
        return (self.data_normalizer.transform(data) + 1) / 2
开发者ID:Falgunithakor,项目名称:SummerResearchDE-BPSO,代码行数:11,代码来源:Normalizer.py

示例3: KNN

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit [as 别名]
class KNN(Model):

    def __init__(self, X_train, y_train, X_val, y_val):
        super().__init__()
        self.normalizer = Normalizer()
        self.normalizer.fit(X_train)
        self.clf = neighbors.KNeighborsRegressor(n_neighbors=10, weights='distance', p=1)
        self.clf.fit(self.normalizer.transform(X_train), numpy.log(y_train))
        print("Result on validation data: ", self.evaluate(self.normalizer.transform(X_val), y_val))

    def guess(self, feature):
        return numpy.exp(self.clf.predict(self.normalizer.transform(feature)))
开发者ID:codeaudit,项目名称:entity-embedding-rossmann,代码行数:14,代码来源:models.py

示例4: test_normalizer_vs_sklearn

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit [as 别名]
def test_normalizer_vs_sklearn():
    # Compare msmbuilder.preprocessing.Normalizer
    # with sklearn.preprocessing.Normalizer

    normalizerr = NormalizerR()
    normalizerr.fit(np.concatenate(trajs))

    normalizer = Normalizer()
    normalizer.fit(trajs)

    y_ref1 = normalizerr.transform(trajs[0])
    y1 = normalizer.transform(trajs)[0]

    np.testing.assert_array_almost_equal(y_ref1, y1)
开发者ID:Eigenstate,项目名称:msmbuilder,代码行数:16,代码来源:test_preprocessing.py

示例5: test_sklearn_transform

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit [as 别名]
def test_sklearn_transform():
    transformer = Normalizer()
    transformer.fit(X_train)

    computation = SklearnTransform("test-sklearn", transformer,
                                   istreams=[], ostream="out")
    context = ComputationContext(computation)

    data = pd.DataFrame(X_test).to_json(orient="records")
    computation.process_record(context, Record("transform", data, None))

    assert len(context.records) == 1
    assert len(context.records["out"]) == 1

    record = context.records["out"][0]
    assert record.key == "transform"
    assert np.allclose(transformer.transform(X_test), json.loads(record.data))
开发者ID:concord,项目名称:ml,代码行数:19,代码来源:test_sklearn.py

示例6: genfromtxt

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit [as 别名]
from sklearn import cross_validation
from sklearn import svm
from sklearn import metrics
from sklearn.preprocessing import StandardScaler, Normalizer
from sklearn.externals import joblib
from grid_search import grid_estimation

# downloading matrix of text features and assigned clusters
all_data = genfromtxt('features_and_clusters.csv', delimiter=',')

data = all_data[:, 0:29]
target = all_data[:, 29]

# normalization and scaling of data
normalizer = Normalizer()
normalizer.fit(data)
data = normalizer.transform(data)
scaler = StandardScaler()
data = scaler.fit_transform(data)

# choosing of training and test sets
X_train, X_test, y_train, y_test = cross_validation.train_test_split(data, target, test_size=0.4, random_state=0)

#clf = svm.SVC(kernel="rbf", gamma=0.001, C=1000).fit(X_train, y_train)
clf = svm.SVC(kernel="linear", gamma=1.0, C=1).fit(X_train, y_train)

# saving of classifier, scaler and normalizer
joblib.dump(clf, 'classifier_data\\model.pkl')
joblib.dump(scaler, 'classifier_data\\scaler.pkl')
joblib.dump(normalizer, 'classifier_data\\normalizer.pkl')
开发者ID:Askinkaty,项目名称:text-readability,代码行数:32,代码来源:classifier.py

示例7: range

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit [as 别名]
    # Append new features
    newAct_train = np.zeros((activation_train.shape[0], activation_train.shape[1]+3))
    for i in range(activation_train.shape[0]):
        newAct_train[i] = np.append(activation_train[i], pttImg_sample_train[i][:3])

    newAct_valid = np.zeros((activation_valid.shape[0], activation_valid.shape[1]+3))
    for i in range(activation_valid.shape[0]):
        newAct_valid[i] = np.append(activation_valid[i], valid_pttImg[i][:3])

    newAct_test = np.zeros((activation_test.shape[0], activation_test.shape[1]+3))
    for i in range(activation_test.shape[0]):
        newAct_test[i] = np.append(activation_test[i], test_blogImg[i][:3])
    # Normalize
    normalizer = Normalizer()
    normalizer.fit(newAct_train)
    newAct_train = normalizer.transform(newAct_train)
    newAct_valid = normalizer.transform(newAct_valid)
    newAct_test = normalizer.transform(newAct_test)

    # Final model
    model3 = Sequential()
    model3.add(Dense(2, input_shape=(newAct_train.shape[1],), activation='softmax'))
    adam = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    model3.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
    print(model3.summary())
    model3.fit(newAct_train, y_train_sample, epochs=epochs3, batch_size=batch_size)

    # Evaluating by using validation data or testing data
    print("Valid:")
    scores = model3.evaluate(newAct_valid, y_valid_sample, verbose=0)
开发者ID:chipyaya,项目名称:Projects_exp,代码行数:32,代码来源:cnn_pretrain_sen_avg_img.py

示例8: LabelEncoder

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit [as 别名]
data_np[:, [5]] = le.transform(np.ravel(data_np[:, [5]])).reshape(n_lin, 1)

# Encode label in the columns 11
le2 = LabelEncoder()
le2.fit(np.ravel(data_np[:, [10]]))
#print le2.classes_
data_np[:, [10]] = le2.transform(np.ravel(data_np[:, [10]])).reshape(n_lin, 1)

# Replace missing values by 0 for the column 16 and 17
data_np = preprocess_replace_NaN(data_np, [15, 16], 'nan')

# plot_NA_ratio_features(data_np, feature_names)

# Normalize the dataset for columns 5, 6, 7, 10, 11, 13, 14, 17 and 25
nor = Normalizer( norm='l1')
nor.fit(data_np[:, [4, 5, 6, 9, 10, 12, 13, 16, 24]].astype(np.float64))
# [0, 1, 2, 6, 11, 17, 18, 19, 20, 21, 22, 23]
data_np[:, [4, 5, 6, 9, 10, 12, 13, 16, 24]] = \
	nor.transform(data_np[:, [4, 5, 6, 9, 10, 12, 13, 16, 24]].astype(np.float64))

# Replace missing values for the risk_factor using a svm classifier
preprocess_missing_risk_factor(data_np)

# plot_pourcentage_result(data_np, feature_names, [17, 18, 19, 20, 21 ,22, 23])

# plot_NA_ratio_features(data_np, feature_names)


################################################################################

# # Replace all missing values for the column 12, 16 and 17 with the median value
开发者ID:ravediamond,项目名称:Kaggle_Allstate_Purchase_Prediction_Challenge,代码行数:33,代码来源:SVM.py

示例9: SVC

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit [as 别名]
for C in np.arange(0.05,2, 0.05):
    for gamma in np.arange(0.001, 0.1, 0.001):
        
        svc = SVC(C=C,gamma=gamma)
        svc.fit(X_train, y_train)
        score = svc.score(X_test, y_test)
        if score > best_score:
            best_score = score 
            print "C, gamma, score", C, gamma, score




#normalizer
norm = Normalizer()
norm.fit(X)
T = norm.transform(X)

X_train, X_test, y_train, y_test = train_test_split(T, y, test_size=0.3, random_state=7)

for C in np.arange(0.05,2, 0.05):
    for gamma in np.arange(0.001, 0.1, 0.001):
        
        svc = SVC(C=C,gamma=gamma)
        svc.fit(X_train, y_train)
        score = svc.score(X_test, y_test)
        if score > best_score:
            best_score = score 
            print "C, gamma, score", C, gamma, score

#maxabs
开发者ID:BioNinja,项目名称:qRT-PCR,代码行数:33,代码来源:assigment3.py

示例10: main

# 需要导入模块: from sklearn.preprocessing import Normalizer [as 别名]
# 或者: from sklearn.preprocessing.Normalizer import fit [as 别名]

#.........这里部分代码省略.........
        )
    
    tfidf13 = CountVectorizer(max_features=5200, strip_accents='unicode',  
        analyzer='word', binary=True,
        ngram_range=(1, 3),tokenizer = SnowballTokenizer())
    
    vectorizers = [tfidf1,tfidf2,tfidf3,tfidf4,tfidf5,tfidf6, tfidf7,tfidf8,tfidf9,tfidf10,tfidf11,tfidf12,tfidf13]
    #vectorizers = [tfidf1,tfidf3,tfidf5,tfidf6]
    #vectorizers = [tfidf1]  
      
    #comment = 'full, SnowballTokenizer no RF'
    use_lsa = 0
    cv_split = 0.2
    n = int(np.round(len(t['tweet'].tolist())))
    train_end = int(np.round(n*(1-cv_split)))
    cv_beginning = int(np.round( n*(1-cv_split
                                     if cv_split > 0 else 0.8)))
    y = np.array(t.ix[:,4:])
    
    train = t['tweet'].tolist()[0:train_end]
    cv_X_original = np.array(t['tweet'].tolist()[cv_beginning:])
    cv_y = np.array(y[cv_beginning:])
        
    if cv_split == 0:
        train = t['tweet'].tolist()
    else:
        y = y[0:int(np.round(len(t['tweet'].tolist())*(1-cv_split)))]   
    
    prediction_grand_all = 0
    predict_cv_grand_all = 0
    list_predictions = []
    list_predictions_test = []
    for tfid in vectorizers:    
        print 'fitting vectorizer...'
        tfid.fit(t['tweet'].tolist() + t2['tweet'].tolist())
        print 'transforming train set...'
        X = tfid.transform(train)
        print 'transforming cv set...'     
        cv_X = tfid.transform(cv_X_original)
        print 'transforming test set...'    
        test = tfid.transform(t2['tweet'])    
        
        clf1 = MultiTaskLasso()
        clf2 = AdaBoostRegressor(learning_rate = 1,n_estimators = 10)
        clf3 = RandomForestRegressor(max_depth = 20, n_estimators = 36, max_features = 100, n_jobs = 6)
        clf4 = Ridge()       
       
        clfs = [clf4, clf3]
        lsa_classifier = [0, 1]
        prediction_all = 0
        predict_cv_all = 0
        for clf, use_lsa in zip(clfs,lsa_classifier):            
            if use_lsa == 1:
                lsa = TruncatedSVD(n_components = 100)
                print 'fitting lsa...'
                lsa.fit(X, y)
                print 'transfomring with lsa...'
                X = lsa.transform(X)
                cv_X = lsa.transform(cv_X)
                test = lsa.transform(test)                
                print 'normalizing....'
                norm = Normalizer()
                norm.fit(X, y)
                X = norm.transform(X, copy= False)
                test = norm.transform(test, copy= False)
                cv_X = norm.transform(cv_X, copy= False)   
开发者ID:ANB2,项目名称:crowdflower,代码行数:70,代码来源:crowdflower_sklearn.py


注:本文中的sklearn.preprocessing.Normalizer.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。