当前位置: 首页>>代码示例>>Python>>正文


Python Binarizer.transform方法代码示例

本文整理汇总了Python中sklearn.preprocessing.Binarizer.transform方法的典型用法代码示例。如果您正苦于以下问题:Python Binarizer.transform方法的具体用法?Python Binarizer.transform怎么用?Python Binarizer.transform使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.preprocessing.Binarizer的用法示例。


在下文中一共展示了Binarizer.transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_binarizer

# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
def test_binarizer():
    X_ = np.array([[1, 0, 5], [2, 3, 0]])

    for init in (np.array, sp.csr_matrix):

        X = init(X_.copy())

        binarizer = Binarizer(threshold=2.0, copy=True)
        X_bin = toarray(binarizer.transform(X))
        assert_equal(np.sum(X_bin == 0), 4)
        assert_equal(np.sum(X_bin == 1), 2)

        binarizer = Binarizer(copy=True).fit(X)
        X_bin = toarray(binarizer.transform(X))
        assert X_bin is not X
        assert_equal(np.sum(X_bin == 0), 2)
        assert_equal(np.sum(X_bin == 1), 4)

        binarizer = Binarizer(copy=True)
        X_bin = binarizer.transform(X)
        assert X_bin is not X
        X_bin = toarray(X_bin)
        assert_equal(np.sum(X_bin == 0), 2)
        assert_equal(np.sum(X_bin == 1), 4)

        binarizer = Binarizer(copy=False)
        X_bin = binarizer.transform(X)
        assert X_bin is X
        X_bin = toarray(X_bin)
        assert_equal(np.sum(X_bin == 0), 2)
        assert_equal(np.sum(X_bin == 1), 4)
开发者ID:Yangqing,项目名称:scikit-learn,代码行数:33,代码来源:test_preprocessing.py

示例2: cv_mean_std_array

# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
def cv_mean_std_array(X, y, alphas, ks, n_a, n_k, cv=20):
    n = n_alphas*n_ks
    cv_mean = np.empty(n)
    cv_std = np.empty(n)
    regressors = pd.DataFrame()

    binarizer = Binarizer(threshold=1400)
    y_binary = binarizer.transform(y).transpose().ravel() 

    itt_counter = 0
    print 'size n_a: %d n_k: %d' %(n_a, n_k)
    for i in range (0, n_a):
    	print 'reg. column : %d' %(i*n_k)
    	temp_string = 'alpha=%f' %alphas[i*n_k]
    	print temp_string
    	print regressors.shape
    	df_temp = pd.DataFrame()
        print 'computing for alpha = %f' %(alphas[n_ks*i])
        X_lasso, df_temp[temp_string] = df_Lasso(X, y, alphas[i*n_k])
        regressors = pd.concat([regressors,df_temp], ignore_index=True, axis=1)
        for j in range(0, n_k):
            print 'i:%d, j:%d' %(i, j)
            print 'computing for alpha = %f and k = %f' %(alphas[n_ks*i+j], ks[n_ks*i+j])
            print 'X_lasso shape:' 
            print X_lasso.shape
            cv_mean[n_ks*i+j], cv_std[n_ks*i+j] = knn_cv_mean_and_std(X_lasso, y_binary, alphas[n_ks*i+j], ks[n_ks*i+j], cv=cv)
            itt_counter = itt_counter + 1
            print 'completed %dth iteration of knn cv mean:%f std:%f, at pos:%d' % (itt_counter, cv_mean[n_ks*i+j], cv_std[n_ks*i+j], n_ks*i+j)
    return cv_mean, cv_std, regressors
开发者ID:AveryLiu,项目名称:Data-Mining,代码行数:31,代码来源:kNN-iterator.py

示例3: initialize

# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
def initialize():
    images, labels = load_mnist_data()

    binarizer = Binarizer().fit(images)
    images_binarized = binarizer.transform(images)

    knn = KNeighborsClassifier(n_neighbors=3, metric='jaccard')
    knn.fit(images_binarized, labels)

    return knn
开发者ID:mikokm,项目名称:DigitGuesser,代码行数:12,代码来源:classifiers.py

示例4: cv_mean_std_array

# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
def cv_mean_std_array(X, y, alphas, n_a, cv=20):
    binarizer = Binarizer(threshold=1400)
    y_binary = binarizer.transform(y).transpose().ravel() 
    cv_ols_means, cv_ols_stds, cv_lasso_means, cv_lasso_stds, cv_ridge_means, cv_ridge_stds = np.empty(n_a), np.empty(n_a), np.empty(n_a), np.empty(n_a), np.empty(n_a), np.empty(n_a)
    
    for i in range (0, n_a):
    	print 'computing for alpha=%f' %alphas[i]
        cv_ols_means[i], cv_ols_stds[i], cv_lasso_means[i], cv_lasso_stds[i], cv_ridge_means[i], cv_ridge_stds[i] = lm_cv_mean_and_std(X, , alphas[i])
        print 'successfully computed iteration %d' %i
    return cv_ols_means, cv_ols_stds, cv_lasso_means, cv_lasso_stds, cv_ridge_means, cv_ridge_stds
开发者ID:AveryLiu,项目名称:Data-Mining,代码行数:12,代码来源:linear-models-iterator.py

示例5: test_binarizer_vs_sklearn

# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
def test_binarizer_vs_sklearn():
    # Compare msmbuilder.preprocessing.Binarizer
    # with sklearn.preprocessing.Binarizer

    binarizerr = BinarizerR()
    binarizerr.fit(np.concatenate(trajs))

    binarizer = Binarizer()
    binarizer.fit(trajs)

    y_ref1 = binarizerr.transform(trajs[0])
    y1 = binarizer.transform(trajs)[0]

    np.testing.assert_array_almost_equal(y_ref1, y1)
开发者ID:Eigenstate,项目名称:msmbuilder,代码行数:16,代码来源:test_preprocessing.py

示例6: test_binarizer

# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
def test_binarizer():
    X_ = np.array([[1, 0, 5], [2, 3, -1]])

    for init in (np.array, list, sparse.csr_matrix, sparse.csc_matrix):

        X = init(X_.copy())

        binarizer = Binarizer(threshold=2.0, copy=True)
        X_bin = toarray(binarizer.transform(X))
        assert_equal(np.sum(X_bin == 0), 4)
        assert_equal(np.sum(X_bin == 1), 2)
        X_bin = binarizer.transform(X)
        assert_equal(sparse.issparse(X), sparse.issparse(X_bin))

        binarizer = Binarizer(copy=True).fit(X)
        X_bin = toarray(binarizer.transform(X))
        assert_true(X_bin is not X)
        assert_equal(np.sum(X_bin == 0), 2)
        assert_equal(np.sum(X_bin == 1), 4)

        binarizer = Binarizer(copy=True)
        X_bin = binarizer.transform(X)
        assert_true(X_bin is not X)
        X_bin = toarray(X_bin)
        assert_equal(np.sum(X_bin == 0), 2)
        assert_equal(np.sum(X_bin == 1), 4)

        binarizer = Binarizer(copy=False)
        X_bin = binarizer.transform(X)
        if init is not list:
            assert_true(X_bin is X)
        X_bin = toarray(X_bin)
        assert_equal(np.sum(X_bin == 0), 2)
        assert_equal(np.sum(X_bin == 1), 4)

    binarizer = Binarizer(threshold=-0.5, copy=True)
    for init in (np.array, list):
        X = init(X_.copy())

        X_bin = toarray(binarizer.transform(X))
        assert_equal(np.sum(X_bin == 0), 1)
        assert_equal(np.sum(X_bin == 1), 5)
        X_bin = binarizer.transform(X)

    # Cannot use threshold < 0 for sparse
    assert_raises(ValueError, binarizer.transform, sparse.csc_matrix(X))
开发者ID:abouaziz,项目名称:scikit-learn,代码行数:48,代码来源:test_preprocessing.py

示例7: by_threshold

# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
	def by_threshold(self, threshold=0.0):
		bin = Skbin(threshold).fit(self.M)
		return bin.transform(self.M)
开发者ID:makgyver,项目名称:pyros,代码行数:5,代码来源:binarizer.py

示例8: Binarizer

# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
# binarization
from sklearn.preprocessing import Binarizer
import pandas
import numpy
url = "https://goo.gl/vhm1eU"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'] 
dataframe = pandas.read_csv(url, names=names)
array = dataframe.values
# separate array into input and output components
X = array[:,0:8]
Y = array[:,8]
binarizer = Binarizer(threshold=0.0).fit(X)
binaryX = binarizer.transform(X)
# summarize transformed data
numpy.set_printoptions(precision=3)
print(binaryX[0:5,:])
开发者ID:philmcc,项目名称:machinelearning,代码行数:18,代码来源:binarize_data.py

示例9: Binarizer

# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
# In[3]:

# Import csv data
raw_data = pd.read_csv('OnlineNewsPopularity_wLabels_deleteNoise.csv').iloc[:, 1:]      # read in csv, omit the first column of url
raw_data = raw_data.iloc[:, :-1] 
news_data = raw_data.iloc[:, :-1]      # Take up to the second last column
news_labels = raw_data.iloc[:, -1]      # Take shares column for labels

# Binarize
print '\nBinary Threshold:'
binary_threshold = np.median(raw_data[' shares'])
news_data = news_data.drop(' n_non_stop_words', 1)
print binary_threshold
binarizer = Binarizer(threshold=binary_threshold)
y_binary = binarizer.transform(news_labels).transpose().ravel() 


# In[ ]:

# Discretize


# In[25]:

# Decision Tree
from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier()
print 'Decision Tree Classifier Accuracy Rate'
tree_score = cross_val_score(tree, news_data, y_binary, cv=10)
np.mean(tree_score)
开发者ID:AveryLiu,项目名称:Data-Mining,代码行数:32,代码来源:DecisionTree&NB.py

示例10: OneHotEncoder

# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
from sklearn.preprocessing import Binarizer, LabelEncoder, OneHotEncoder

onehot_encoder = OneHotEncoder()
label_encoder = LabelEncoder()

x = ['a', 'b', 'c']

label_x = label_encoder.fit_transform(x).reshape([len(x), 1])
print(label_x)
print(onehot_encoder.fit_transform(label_x).toarray())

binarizer = Binarizer(threshold=1.0).fit(label_x)
print(binarizer.transform(label_x))
开发者ID:yaochitc,项目名称:learning_libraries,代码行数:15,代码来源:features.py

示例11: zeros

# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
varSizeStatisticsTrain = zeros(numCombinations, dtype=float)
varSizeStatisticsTest = zeros(numCombinations, dtype=float)

a = 0

mnist = fetch_mldata('MNIST original')

# split a training set and a test set
y_train, y_test = mnist.target[:60000], mnist.target[60000:70000]

#vectorizer = CountVectorizer(binary=True)
X_both = mnist.data

binarizer = Binarizer().fit(50,X_both)
X_both = binarizer.transform(X_both)

X_train = X_both[:60000]
X_test =  X_both[60000:70000]

#print X_train[1]

#ch2 = SelectKBest(chi2, 750)
#X_train = ch2.fit_transform(X_train, y_train)
#X_test = ch2.transform(X_test)

data_train = X_train
m,n = data_train.shape

print m," ",n
开发者ID:mniepert,项目名称:exchangeable-variable-models,代码行数:31,代码来源:loadMNIST-all-st.py

示例12: Binarizer

# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
# # Binarization

# In[6]:

watched = np.array(popsong_df['listen_count']) 
watched[watched >= 1] = 1
popsong_df['watched'] = watched
popsong_df.head(10)


# In[7]:

from sklearn.preprocessing import Binarizer

bn = Binarizer(threshold=0.9)
pd_watched = bn.transform([popsong_df['listen_count']])[0]
popsong_df['pd_watched'] = pd_watched
popsong_df.head(11)


# # Rounding

# In[8]:

items_popularity = pd.read_csv('datasets/item_popularity.csv', encoding='utf-8')
items_popularity


# In[9]:

items_popularity['popularity_scale_10'] = np.array(np.round((items_popularity['pop_percent'] * 10)), dtype='int')
开发者ID:Zoery,项目名称:practical-machine-learning-with-python,代码行数:33,代码来源:feature_engineering_numeric.py

示例13: DictVectorizer

# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
news_labels = extracted_data[' shares']      # Take shares column for labels

# Data Preprocessing
news_data_transpose = news_data.transpose()
data_into_dict = news_data_transpose.to_dict()
list_data = [v for k, v in data_into_dict.iteritems()]

# Encode
from sklearn.feature_extraction import DictVectorizer
dv = DictVectorizer()
transformed_data = dv.fit_transform(list_data).toarray()

# Label Encoder - Binarization
from sklearn.preprocessing import Binarizer
binarizer = Binarizer(threshold=1400)                           # Threshold at 1400 because median of shares is 1400
transformed_labels = binarizer.transform(news_labels)
transformed_labels = transformed_labels.transpose().ravel()     # .ravel() is to fix "Too many array indices error"
                                                                # Could be a scikit or pandas bug
############## Classification #################

from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVC

# Decision Tree Classifier
tree = DecisionTreeClassifier()
knn = KNeighborsClassifier()
gnb = GaussianNB()
# lr = LinearRegression()
开发者ID:AveryLiu,项目名称:Data-Mining,代码行数:33,代码来源:Data_Preprocessing_Script.py

示例14: train_test_split

# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
X = (news_data * lasso_est.transpose()) # multiply element wise with lasso estimate
df_Lasso = X[X.columns[(X != 0).any()]] # remove columns where all elements are zero
print df_Lasso.shape # number of columns should significantly shrink depending on choice of alpha
df_Lasso.columns.values.tolist()


# In[104]:

#obtain a split
# from sklearn.cross_validation import train_test_split
# X_train, X_test, y_train, y_test = train_test_split(df_Lasso, news_labels)

#binarize
from sklearn.preprocessing import Binarizer
binarizer = Binarizer(threshold=binary_threshold)
binary_labels = binarizer.transform(news_labels).transpose().ravel()     # .ravel() is to fix "Too many array indices error"
print binary_labels.shape


# In[107]:

from sklearn.neighbors import KNeighborsClassifier
from sklearn.cross_validation import cross_val_score

knn = KNeighborsClassifier(n_neighbors=1) # arbitrary k
cv = cross_val_score(knn, df_Lasso, binary_labels, cv=10)
print "Cross Validation Scores"
print cv
print 'Mean Cross Validation Score'
print np.mean(cv)
开发者ID:AveryLiu,项目名称:Data-Mining,代码行数:32,代码来源:Obtaining+significant+regressors+using+a+Lasso+Regression+(1).py

示例15: ngram

# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
#	Comment section below out if you already have made pickle files
#
#---------------------------------------------------------------------------------------

all_bigr = ngram(X_train, 'bigram') #starting with all features

print "Starting counting bigrams..."
X_train_bi_counted = count(X_train, all_bigr, 'bigram')
print "Done counting train set"
X_test_bi_counted = count(X_test, all_bigr, 'bigram')
print "Done counting test set"

print "Binarizing and dumping files"
bin = Binarizer()
X_train_bi_binary = bin.fit_transform(X_train_bi_counted)
X_test_bi_binary = bin.transform(X_test_bi_counted)
pickle.dump(X_train_bi_binary, open( "X_train_bi_binary.p", "wb" ) )
pickle.dump(X_test_bi_binary, open( "X_test_bi_binary.p", "wb" ) )
print "Done"


print "Starting tfidf vectors..."
X_train_bi_tfidf, X_test_bi_tfidf = tfidf(X_train_bi_counted, X_test_bi_counted)
pickle.dump(X_train_bi_tfidf, open( "X_train_bi_tfidf.p", "wb" ) )
pickle.dump(X_test_bi_tfidf, open( "X_test_bi_tfidf.p", "wb" ) )
print "Done"


print "Starting feature selection using CART random forests on binary files"
indices_important_feats_bi_bin = tree(X_train_bi_binary, y_train, all_bigr, 'Bigram_binary')
pickle.dump(indices_important_feats_bi_bin, open( "indices_important_feats_bi_bin.p", "wb" ) )
开发者ID:MariaBarrett,项目名称:LPIIExam,代码行数:33,代码来源:ngram.py


注:本文中的sklearn.preprocessing.Binarizer.transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。