本文整理汇总了Python中sklearn.preprocessing.Binarizer.transform方法的典型用法代码示例。如果您正苦于以下问题:Python Binarizer.transform方法的具体用法?Python Binarizer.transform怎么用?Python Binarizer.transform使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.preprocessing.Binarizer
的用法示例。
在下文中一共展示了Binarizer.transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_binarizer
# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
def test_binarizer():
X_ = np.array([[1, 0, 5], [2, 3, 0]])
for init in (np.array, sp.csr_matrix):
X = init(X_.copy())
binarizer = Binarizer(threshold=2.0, copy=True)
X_bin = toarray(binarizer.transform(X))
assert_equal(np.sum(X_bin == 0), 4)
assert_equal(np.sum(X_bin == 1), 2)
binarizer = Binarizer(copy=True).fit(X)
X_bin = toarray(binarizer.transform(X))
assert X_bin is not X
assert_equal(np.sum(X_bin == 0), 2)
assert_equal(np.sum(X_bin == 1), 4)
binarizer = Binarizer(copy=True)
X_bin = binarizer.transform(X)
assert X_bin is not X
X_bin = toarray(X_bin)
assert_equal(np.sum(X_bin == 0), 2)
assert_equal(np.sum(X_bin == 1), 4)
binarizer = Binarizer(copy=False)
X_bin = binarizer.transform(X)
assert X_bin is X
X_bin = toarray(X_bin)
assert_equal(np.sum(X_bin == 0), 2)
assert_equal(np.sum(X_bin == 1), 4)
示例2: cv_mean_std_array
# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
def cv_mean_std_array(X, y, alphas, ks, n_a, n_k, cv=20):
n = n_alphas*n_ks
cv_mean = np.empty(n)
cv_std = np.empty(n)
regressors = pd.DataFrame()
binarizer = Binarizer(threshold=1400)
y_binary = binarizer.transform(y).transpose().ravel()
itt_counter = 0
print 'size n_a: %d n_k: %d' %(n_a, n_k)
for i in range (0, n_a):
print 'reg. column : %d' %(i*n_k)
temp_string = 'alpha=%f' %alphas[i*n_k]
print temp_string
print regressors.shape
df_temp = pd.DataFrame()
print 'computing for alpha = %f' %(alphas[n_ks*i])
X_lasso, df_temp[temp_string] = df_Lasso(X, y, alphas[i*n_k])
regressors = pd.concat([regressors,df_temp], ignore_index=True, axis=1)
for j in range(0, n_k):
print 'i:%d, j:%d' %(i, j)
print 'computing for alpha = %f and k = %f' %(alphas[n_ks*i+j], ks[n_ks*i+j])
print 'X_lasso shape:'
print X_lasso.shape
cv_mean[n_ks*i+j], cv_std[n_ks*i+j] = knn_cv_mean_and_std(X_lasso, y_binary, alphas[n_ks*i+j], ks[n_ks*i+j], cv=cv)
itt_counter = itt_counter + 1
print 'completed %dth iteration of knn cv mean:%f std:%f, at pos:%d' % (itt_counter, cv_mean[n_ks*i+j], cv_std[n_ks*i+j], n_ks*i+j)
return cv_mean, cv_std, regressors
示例3: initialize
# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
def initialize():
images, labels = load_mnist_data()
binarizer = Binarizer().fit(images)
images_binarized = binarizer.transform(images)
knn = KNeighborsClassifier(n_neighbors=3, metric='jaccard')
knn.fit(images_binarized, labels)
return knn
示例4: cv_mean_std_array
# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
def cv_mean_std_array(X, y, alphas, n_a, cv=20):
binarizer = Binarizer(threshold=1400)
y_binary = binarizer.transform(y).transpose().ravel()
cv_ols_means, cv_ols_stds, cv_lasso_means, cv_lasso_stds, cv_ridge_means, cv_ridge_stds = np.empty(n_a), np.empty(n_a), np.empty(n_a), np.empty(n_a), np.empty(n_a), np.empty(n_a)
for i in range (0, n_a):
print 'computing for alpha=%f' %alphas[i]
cv_ols_means[i], cv_ols_stds[i], cv_lasso_means[i], cv_lasso_stds[i], cv_ridge_means[i], cv_ridge_stds[i] = lm_cv_mean_and_std(X, , alphas[i])
print 'successfully computed iteration %d' %i
return cv_ols_means, cv_ols_stds, cv_lasso_means, cv_lasso_stds, cv_ridge_means, cv_ridge_stds
示例5: test_binarizer_vs_sklearn
# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
def test_binarizer_vs_sklearn():
# Compare msmbuilder.preprocessing.Binarizer
# with sklearn.preprocessing.Binarizer
binarizerr = BinarizerR()
binarizerr.fit(np.concatenate(trajs))
binarizer = Binarizer()
binarizer.fit(trajs)
y_ref1 = binarizerr.transform(trajs[0])
y1 = binarizer.transform(trajs)[0]
np.testing.assert_array_almost_equal(y_ref1, y1)
示例6: test_binarizer
# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
def test_binarizer():
X_ = np.array([[1, 0, 5], [2, 3, -1]])
for init in (np.array, list, sparse.csr_matrix, sparse.csc_matrix):
X = init(X_.copy())
binarizer = Binarizer(threshold=2.0, copy=True)
X_bin = toarray(binarizer.transform(X))
assert_equal(np.sum(X_bin == 0), 4)
assert_equal(np.sum(X_bin == 1), 2)
X_bin = binarizer.transform(X)
assert_equal(sparse.issparse(X), sparse.issparse(X_bin))
binarizer = Binarizer(copy=True).fit(X)
X_bin = toarray(binarizer.transform(X))
assert_true(X_bin is not X)
assert_equal(np.sum(X_bin == 0), 2)
assert_equal(np.sum(X_bin == 1), 4)
binarizer = Binarizer(copy=True)
X_bin = binarizer.transform(X)
assert_true(X_bin is not X)
X_bin = toarray(X_bin)
assert_equal(np.sum(X_bin == 0), 2)
assert_equal(np.sum(X_bin == 1), 4)
binarizer = Binarizer(copy=False)
X_bin = binarizer.transform(X)
if init is not list:
assert_true(X_bin is X)
X_bin = toarray(X_bin)
assert_equal(np.sum(X_bin == 0), 2)
assert_equal(np.sum(X_bin == 1), 4)
binarizer = Binarizer(threshold=-0.5, copy=True)
for init in (np.array, list):
X = init(X_.copy())
X_bin = toarray(binarizer.transform(X))
assert_equal(np.sum(X_bin == 0), 1)
assert_equal(np.sum(X_bin == 1), 5)
X_bin = binarizer.transform(X)
# Cannot use threshold < 0 for sparse
assert_raises(ValueError, binarizer.transform, sparse.csc_matrix(X))
示例7: by_threshold
# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
def by_threshold(self, threshold=0.0):
bin = Skbin(threshold).fit(self.M)
return bin.transform(self.M)
示例8: Binarizer
# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
# binarization
from sklearn.preprocessing import Binarizer
import pandas
import numpy
url = "https://goo.gl/vhm1eU"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pandas.read_csv(url, names=names)
array = dataframe.values
# separate array into input and output components
X = array[:,0:8]
Y = array[:,8]
binarizer = Binarizer(threshold=0.0).fit(X)
binaryX = binarizer.transform(X)
# summarize transformed data
numpy.set_printoptions(precision=3)
print(binaryX[0:5,:])
示例9: Binarizer
# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
# In[3]:
# Import csv data
raw_data = pd.read_csv('OnlineNewsPopularity_wLabels_deleteNoise.csv').iloc[:, 1:] # read in csv, omit the first column of url
raw_data = raw_data.iloc[:, :-1]
news_data = raw_data.iloc[:, :-1] # Take up to the second last column
news_labels = raw_data.iloc[:, -1] # Take shares column for labels
# Binarize
print '\nBinary Threshold:'
binary_threshold = np.median(raw_data[' shares'])
news_data = news_data.drop(' n_non_stop_words', 1)
print binary_threshold
binarizer = Binarizer(threshold=binary_threshold)
y_binary = binarizer.transform(news_labels).transpose().ravel()
# In[ ]:
# Discretize
# In[25]:
# Decision Tree
from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier()
print 'Decision Tree Classifier Accuracy Rate'
tree_score = cross_val_score(tree, news_data, y_binary, cv=10)
np.mean(tree_score)
示例10: OneHotEncoder
# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
from sklearn.preprocessing import Binarizer, LabelEncoder, OneHotEncoder
onehot_encoder = OneHotEncoder()
label_encoder = LabelEncoder()
x = ['a', 'b', 'c']
label_x = label_encoder.fit_transform(x).reshape([len(x), 1])
print(label_x)
print(onehot_encoder.fit_transform(label_x).toarray())
binarizer = Binarizer(threshold=1.0).fit(label_x)
print(binarizer.transform(label_x))
示例11: zeros
# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
varSizeStatisticsTrain = zeros(numCombinations, dtype=float)
varSizeStatisticsTest = zeros(numCombinations, dtype=float)
a = 0
mnist = fetch_mldata('MNIST original')
# split a training set and a test set
y_train, y_test = mnist.target[:60000], mnist.target[60000:70000]
#vectorizer = CountVectorizer(binary=True)
X_both = mnist.data
binarizer = Binarizer().fit(50,X_both)
X_both = binarizer.transform(X_both)
X_train = X_both[:60000]
X_test = X_both[60000:70000]
#print X_train[1]
#ch2 = SelectKBest(chi2, 750)
#X_train = ch2.fit_transform(X_train, y_train)
#X_test = ch2.transform(X_test)
data_train = X_train
m,n = data_train.shape
print m," ",n
示例12: Binarizer
# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
# # Binarization
# In[6]:
watched = np.array(popsong_df['listen_count'])
watched[watched >= 1] = 1
popsong_df['watched'] = watched
popsong_df.head(10)
# In[7]:
from sklearn.preprocessing import Binarizer
bn = Binarizer(threshold=0.9)
pd_watched = bn.transform([popsong_df['listen_count']])[0]
popsong_df['pd_watched'] = pd_watched
popsong_df.head(11)
# # Rounding
# In[8]:
items_popularity = pd.read_csv('datasets/item_popularity.csv', encoding='utf-8')
items_popularity
# In[9]:
items_popularity['popularity_scale_10'] = np.array(np.round((items_popularity['pop_percent'] * 10)), dtype='int')
示例13: DictVectorizer
# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
news_labels = extracted_data[' shares'] # Take shares column for labels
# Data Preprocessing
news_data_transpose = news_data.transpose()
data_into_dict = news_data_transpose.to_dict()
list_data = [v for k, v in data_into_dict.iteritems()]
# Encode
from sklearn.feature_extraction import DictVectorizer
dv = DictVectorizer()
transformed_data = dv.fit_transform(list_data).toarray()
# Label Encoder - Binarization
from sklearn.preprocessing import Binarizer
binarizer = Binarizer(threshold=1400) # Threshold at 1400 because median of shares is 1400
transformed_labels = binarizer.transform(news_labels)
transformed_labels = transformed_labels.transpose().ravel() # .ravel() is to fix "Too many array indices error"
# Could be a scikit or pandas bug
############## Classification #################
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVC
# Decision Tree Classifier
tree = DecisionTreeClassifier()
knn = KNeighborsClassifier()
gnb = GaussianNB()
# lr = LinearRegression()
示例14: train_test_split
# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
X = (news_data * lasso_est.transpose()) # multiply element wise with lasso estimate
df_Lasso = X[X.columns[(X != 0).any()]] # remove columns where all elements are zero
print df_Lasso.shape # number of columns should significantly shrink depending on choice of alpha
df_Lasso.columns.values.tolist()
# In[104]:
#obtain a split
# from sklearn.cross_validation import train_test_split
# X_train, X_test, y_train, y_test = train_test_split(df_Lasso, news_labels)
#binarize
from sklearn.preprocessing import Binarizer
binarizer = Binarizer(threshold=binary_threshold)
binary_labels = binarizer.transform(news_labels).transpose().ravel() # .ravel() is to fix "Too many array indices error"
print binary_labels.shape
# In[107]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cross_validation import cross_val_score
knn = KNeighborsClassifier(n_neighbors=1) # arbitrary k
cv = cross_val_score(knn, df_Lasso, binary_labels, cv=10)
print "Cross Validation Scores"
print cv
print 'Mean Cross Validation Score'
print np.mean(cv)
开发者ID:AveryLiu,项目名称:Data-Mining,代码行数:32,代码来源:Obtaining+significant+regressors+using+a+Lasso+Regression+(1).py
示例15: ngram
# 需要导入模块: from sklearn.preprocessing import Binarizer [as 别名]
# 或者: from sklearn.preprocessing.Binarizer import transform [as 别名]
# Comment section below out if you already have made pickle files
#
#---------------------------------------------------------------------------------------
all_bigr = ngram(X_train, 'bigram') #starting with all features
print "Starting counting bigrams..."
X_train_bi_counted = count(X_train, all_bigr, 'bigram')
print "Done counting train set"
X_test_bi_counted = count(X_test, all_bigr, 'bigram')
print "Done counting test set"
print "Binarizing and dumping files"
bin = Binarizer()
X_train_bi_binary = bin.fit_transform(X_train_bi_counted)
X_test_bi_binary = bin.transform(X_test_bi_counted)
pickle.dump(X_train_bi_binary, open( "X_train_bi_binary.p", "wb" ) )
pickle.dump(X_test_bi_binary, open( "X_test_bi_binary.p", "wb" ) )
print "Done"
print "Starting tfidf vectors..."
X_train_bi_tfidf, X_test_bi_tfidf = tfidf(X_train_bi_counted, X_test_bi_counted)
pickle.dump(X_train_bi_tfidf, open( "X_train_bi_tfidf.p", "wb" ) )
pickle.dump(X_test_bi_tfidf, open( "X_test_bi_tfidf.p", "wb" ) )
print "Done"
print "Starting feature selection using CART random forests on binary files"
indices_important_feats_bi_bin = tree(X_train_bi_binary, y_train, all_bigr, 'Bigram_binary')
pickle.dump(indices_important_feats_bi_bin, open( "indices_important_feats_bi_bin.p", "wb" ) )