本文整理汇总了Python中sklearn.cross_validation.train_test_split方法的典型用法代码示例。如果您正苦于以下问题:Python cross_validation.train_test_split方法的具体用法?Python cross_validation.train_test_split怎么用?Python cross_validation.train_test_split使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.cross_validation
的用法示例。
在下文中一共展示了cross_validation.train_test_split方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: split_to_test_and_train
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def split_to_test_and_train(data, labels, entities, test_size=DEFAULT_TEST_SIZE):
d_train, d_test, l_train, l_test, c_train, c_test = train_test_split(data, labels, entities, test_size=test_size)
d_test_2 = []
l_test_2 = []
c_test_2 = []
train_dict = {}
for d in d_train:
train_dict[d] = 1
for d,l,c in zip(d_test, l_test, c_test):
if (train_dict.has_key(d)):
continue
d_test_2.append(d)
l_test_2.append(l)
c_test_2.append(c)
return (d_train, d_test_2, l_train, l_test_2, c_train, c_test_2)
# utility to extracts entities from preproceseed files
示例2: split_train_test
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def split_train_test(n_classes):
from sklearn.datasets import load_digits
n_labeled = 5
digits = load_digits(n_class=n_classes) # consider binary case
X = digits.data
y = digits.target
print(np.shape(X))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
while len(np.unique(y_train[:n_labeled])) < n_classes:
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.33)
trn_ds = Dataset(X_train, np.concatenate(
[y_train[:n_labeled], [None] * (len(y_train) - n_labeled)]))
tst_ds = Dataset(X_test, y_test)
return trn_ds, tst_ds, digits
示例3: fit
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def fit(self, X, y):
X_train, X_test, y_train, y_test = \
train_test_split(X, y, test_size=self.test_size,
random_state=self.random_state)
dim = X_train.shape[1]
self.indices_ = tuple(range(dim))
self.subsets_ = [self.indices_]
score = self._calc_score(X_train, y_train,
X_test, y_test, self.indices_)
self.scores_ = [score]
while dim > self.k_features:
scores = []
subsets = []
for p in combinations(self.indices_, r=dim-1):
score = self._calc_score(X_train, y_train,
X_test, y_test, p)
scores.append(score)
subsets.append(p)
best = np.argmax(scores)
self.indices_ = subsets[best]
self.subsets_.append(self.indices_)
dim -= 1
self.scores_.append(scores[best])
self.k_score_ = self.scores_[-1]
return self
示例4: main
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def main(unused_argv):
# Load dataset.
iris = datasets.load_iris()
x_train, x_test, y_train, y_test = cross_validation.train_test_split(
iris.data, iris.target, test_size=0.2, random_state=42)
# You can define you configurations by providing a RunConfig object to
# estimator to control session configurations, e.g. num_cores
# and gpu_memory_fraction
run_config = tf.contrib.learn.estimators.RunConfig(
num_cores=3, gpu_memory_fraction=0.6)
# Build 3 layer DNN with 10, 20, 10 units respectively.
feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(
x_train)
classifier = tf.contrib.learn.DNNClassifier(feature_columns=feature_columns,
hidden_units=[10, 20, 10],
n_classes=3,
config=run_config)
# Fit and predict.
classifier.fit(x_train, y_train, steps=200)
predictions = list(classifier.predict(x_test, as_iterable=True))
score = metrics.accuracy_score(y_test, predictions)
print('Accuracy: {0:f}'.format(score))
示例5: main
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def main(unused_argv):
# Load dataset.
iris = learn.datasets.load_dataset('iris')
x_train, x_test, y_train, y_test = cross_validation.train_test_split(
iris.data, iris.target, test_size=0.2, random_state=42)
# Build 3 layer DNN with 10, 20, 10 units respectively.
feature_columns = learn.infer_real_valued_columns_from_input(x_train)
classifier = learn.DNNClassifier(
feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3)
# Fit and predict.
classifier.fit(x_train, y_train, steps=200)
predictions = list(classifier.predict(x_test, as_iterable=True))
score = metrics.accuracy_score(y_test, predictions)
print('Accuracy: {0:f}'.format(score))
示例6: main
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def main(unused_argv):
iris = datasets.load_iris()
x_train, x_test, y_train, y_test = train_test_split(
iris.data, iris.target, test_size=0.2, random_state=42)
feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(
x_train)
classifier = tf.contrib.learn.DNNClassifier(feature_columns=feature_columns,
hidden_units=[10, 20, 10],
n_classes=3,
optimizer=optimizer_exp_decay)
classifier.fit(x_train, y_train, steps=800)
predictions = list(classifier.predict(x_test, as_iterable=True))
score = metrics.accuracy_score(y_test, predictions)
print('Accuracy: {0:f}'.format(score))
示例7: test_get_top_features
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def test_get_top_features(self):
M, labels = uft.generate_test_matrix(1000, 15, random_state=0)
M = utils.cast_np_sa_to_nd(M)
M_train, M_test, labels_train, labels_test = train_test_split(
M,
labels)
clf = RandomForestClassifier(random_state=0)
clf.fit(M_train, labels_train)
ctrl_feat_importances = clf.feature_importances_
ctrl_col_names = ['f{}'.format(i) for i in xrange(15)]
ctrl_feat_ranks = np.argsort(ctrl_feat_importances)[::-1][:10]
ctrl = utils.convert_to_sa(
zip(ctrl_col_names, ctrl_feat_importances),
col_names=('feat_name', 'score'))[ctrl_feat_ranks]
res = dsp.get_top_features(clf, M, verbose=False)
self.assertTrue(uft.array_equal(ctrl, res))
res = dsp.get_top_features(clf, col_names=['f{}'.format(i) for i in xrange(15)], verbose=False)
self.assertTrue(uft.array_equal(ctrl, res))
示例8: classify
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def classify(X, y, cl, name=''):
"""Classification using gene features"""
from sklearn.metrics import classification_report, accuracy_score
np.random.seed()
ind = np.random.permutation(len(X))
from sklearn.cross_validation import train_test_split
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.4)
#print X
cl.fit(Xtrain, ytrain)
ypred = cl.predict(Xtest)
print (classification_report(ytest, ypred))
#print accuracy_score(ytest, ypred)
from sklearn import cross_validation
yl = pd.Categorical(y).labels
sc = cross_validation.cross_val_score(cl, X, yl, scoring='roc_auc', cv=5)
print("AUC: %0.2f (+/- %0.2f)" % (sc.mean(), sc.std() * 2))
return cl
示例9: load_data
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def load_data():
global training_data, testing_data
lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)
xs = lfw_people.data
ys = lfw_people.target
inputs = []
labels = list(ys)
for face in xs:
V = Vol(50, 37, 1, 0.0)
V.w = list(face)
inputs.append(augment(V, 30))
x_tr, x_te, y_tr, y_te = train_test_split(inputs, labels, test_size=0.25)
training_data = zip(x_tr, y_tr)
testing_data = zip(x_te, y_te)
print 'Dataset made...'
示例10: load_train_val_test
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def load_train_val_test(self, only_test=False):
X_train, X_rest, y_train, y_rest = train_test_split(self.X, self.y,
test_size=0.3,
stratify=self.y,
random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_rest, y_rest,
test_size=0.5,
stratify=y_rest,
random_state=42)
if not only_test:
print("\nPreparing training set...")
training = prepare_dataset(X_train, y_train, self.pipeline,
self.y_one_hot)
print("\nPreparing validation set...")
validation = prepare_dataset(X_val, y_val, self.pipeline,
self.y_one_hot)
print("\nPreparing test set...")
testing = prepare_dataset(X_test, y_test, self.pipeline,
self.y_one_hot)
if only_test:
return testing
else:
return training, validation, testing
示例11: _shuffle_images_for_target
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def _shuffle_images_for_target(self, data, target):
"""
Takes all the non-paired images for a given person, slices them into training, validation, and
training sets, and shuffles within each of these sets.
"""
# train_test_split can only partition into two sets, so we have to partition into two sets, then
# further partition the validation set into a test set.
(train_data, other_data, train_target, other_target) = train_test_split(data, target,
train_size=0.7, test_size=0.3, random_state=0)
self._train["data"].extend(train_data)
self._train["target"].extend(train_target)
(validation_data, test_data, validation_target, test_target) = train_test_split(other_data,
other_target, train_size=0.9, test_size=0.1, random_state=0)
self._validation["data"].extend(validation_data)
self._validation["target"].extend(validation_target)
self._test["data"].extend(test_data)
self._test["target"].extend(test_target)
示例12: splitData
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def splitData(self, dataFile, test_size):
# 加载数据集
header = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv(dataFile, sep='\t', names=header)
self.n_users = df.user_id.unique().shape[0]
self.n_items = df.item_id.unique().shape[0]
print('Number of users = ' + str(self.n_users) +
' | Number of items = ' + str(self.n_items))
# 拆分数据集: 用户+电影
self.train_data, self.test_data = cv.train_test_split(
df, test_size=test_size)
print('分离训练集和测试集成功', file=sys.stderr)
print('len(train) = %s' % np.shape(self.train_data)[0], file=sys.stderr)
print('len(test) = %s' % np.shape(self.test_data)[0], file=sys.stderr)
示例13: _split_data_sets
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def _split_data_sets(details):
"""
Shuffles and splits our datasets into training and validation sets.
"""
image_paths = details["image_paths"]
targets = details["targets"]
print "\tShuffling data..."
(image_paths, targets) = shuffle(image_paths, targets, random_state=0)
print "\tSplitting data 80% training, 20% validation..."
return train_test_split(image_paths, targets, train_size=0.8, test_size=0.2, \
random_state=0)
示例14: split_train_test
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def split_train_test(test_size):
# choose a dataset with unbalanced class instances
data = make_multilabel_classification(
n_samples=300, n_classes=10, allow_unlabeled=False)
X = StandardScaler().fit_transform(data[0])
Y = data[1]
X_trn, X_tst, Y_trn, Y_tst = train_test_split(X, Y, test_size=test_size)
trn_ds = Dataset(X_trn, Y_trn[:5].tolist() + [None] * (len(Y_trn) - 5))
tst_ds = Dataset(X_tst, Y_tst.tolist())
fully_labeled_trn_ds = Dataset(X_trn, Y_trn)
return trn_ds, tst_ds, fully_labeled_trn_ds
示例15: main
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import train_test_split [as 别名]
def main():
# Define X and y
# # Load data
PATH = "./data/64_64_1/offset_1.3/"
X = np.load(PATH + "basic_dataset_img.npz")
y = np.load(PATH + "basic_dataset_pts.npz")
X = X['arr_0']
y = y['arr_0'].reshape(-1, 136)
print("Define X and Y")
print("=======================================")
# Split train / test dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
print("Success of getting train / test dataset")
print("=======================================")
print("X_train: ", X_train.shape)
print("y_train: ", y_train.shape)
print("X_test: ", X_test.shape)
print("y_test: ", y_test.shape)
print("=======================================")
model.compile(loss=smoothL1, optimizer=keras.optimizers.Adam(lr=1e-3), metrics=['mape'])
print(model.summary())
# checkpoint
filepath="./mobilenet_checkpoints/smooth_L1-{epoch:02d}-{val_mean_absolute_percentage_error:.5f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]
history = model.fit(X_train, y_train, batch_size=64, epochs=10000, shuffle=True,\
verbose=1, validation_data=(X_test, y_test), callbacks=callbacks_list)
# Save model
model.save("./model/face_landmark_dnn.h5")
print("=======================================")
print("Save Final Model")
print("=======================================")