本文整理汇总了Python中sklearn.cross_validation.ShuffleSplit方法的典型用法代码示例。如果您正苦于以下问题:Python cross_validation.ShuffleSplit方法的具体用法?Python cross_validation.ShuffleSplit怎么用?Python cross_validation.ShuffleSplit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.cross_validation
的用法示例。
在下文中一共展示了cross_validation.ShuffleSplit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: split_data
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import ShuffleSplit [as 别名]
def split_data(X, y, splittype='timed', splitfrac=0.1, verbose=False):
if(splittype == 'rand'):
rs1 = cross_validation.ShuffleSplit(len(X), n_iter=1, test_size=splitfrac)
for train, test in rs1:
if(verbose):
print "Training blocks:", train
print "Test blocks:", test
X_train, y_train, X_test, y_test = X[train], y[train], X[test], y[test]
elif(splittype == 'timed'):
split = int((1.-splitfrac)*len(X))
if(verbose):
print "Split at block ", str(split)
X_train, y_train, X_test, y_test = X[:split], y[:split], X[split:], y[split:]
else:
raw_input("Split type ERROR in ml.py")
return X_train, y_train, X_test, y_test
示例2: data_split
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import ShuffleSplit [as 别名]
def data_split(inputfile):
data = hkl.load(inputfile)
X = data['mat']
X_kspec = data['kmer']
y = data['y']
rs = ShuffleSplit(len(y), n_iter=1,random_state = 1)
X_kspec = X_kspec.reshape((X_kspec.shape[0],1024,4))
X = np.concatenate((X,X_kspec), axis = 1)
X = X[:,np.newaxis]
X = X.transpose((0,1,3,2))
for train_idx, test_idx in rs:
X_train = X[train_idx,:]
y_train = y[train_idx]
X_test = X[test_idx,:]
y_test = y[test_idx]
X_train = X_train.astype('float32')
y_train = y_train.astype('int32')
X_test = X_test.astype('float32')
y_test = y_test.astype('int32')
return [X_train, y_train, X_test, y_test]
#define the network architecture
示例3: grid_search_model
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import ShuffleSplit [as 别名]
def grid_search_model(clf_factory, X, Y):
cv = ShuffleSplit(
n=len(X), n_iter=10, test_size=0.3, indices=True, random_state=0)
param_grid = dict(vect__ngram_range=[(1, 1), (1, 2), (1, 3)],
vect__min_df=[1, 2],
vect__stop_words=[None, "english"],
vect__smooth_idf=[False, True],
vect__use_idf=[False, True],
vect__sublinear_tf=[False, True],
vect__binary=[False, True],
clf__alpha=[0, 0.01, 0.05, 0.1, 0.5, 1],
)
grid_search = GridSearchCV(clf_factory(),
param_grid=param_grid,
cv=cv,
score_func=f1_score,
verbose=10)
grid_search.fit(X, Y)
clf = grid_search.best_estimator_
print clf
return clf
开发者ID:PacktPublishing,项目名称:Building-Machine-Learning-Systems-With-Python-Second-Edition,代码行数:26,代码来源:02_tuning.py
示例4: __grid_search_model
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import ShuffleSplit [as 别名]
def __grid_search_model(clf_factory, X, Y):
cv = ShuffleSplit(
n=len(X), n_iter=10, test_size=0.3, indices=True, random_state=0)
param_grid = dict(vect__ngram_range=[(1, 1), (1, 2), (1, 3)],
vect__min_df=[1, 2],
vect__smooth_idf=[False, True],
vect__use_idf=[False, True],
vect__sublinear_tf=[False, True],
vect__binary=[False, True],
clf__alpha=[0, 0.01, 0.05, 0.1, 0.5, 1],
)
grid_search = GridSearchCV(clf_factory(),
param_grid=param_grid,
cv=cv,
score_func=f1_score,
verbose=10)
grid_search.fit(X, Y)
clf = grid_search.best_estimator_
print clf
return clf
开发者ID:PacktPublishing,项目名称:Building-Machine-Learning-Systems-With-Python-Second-Edition,代码行数:25,代码来源:04_sent.py
示例5: test_cross_val_generator_with_indices
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import ShuffleSplit [as 别名]
def test_cross_val_generator_with_indices():
X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
y = np.array([1, 1, 2, 2])
labels = np.array([1, 2, 3, 4])
# explicitly passing indices value is deprecated
loo = cval.LeaveOneOut(4)
lpo = cval.LeavePOut(4, 2)
kf = cval.KFold(4, 2)
skf = cval.StratifiedKFold(y, 2)
lolo = cval.LeaveOneLabelOut(labels)
lopo = cval.LeavePLabelOut(labels, 2)
ps = cval.PredefinedSplit([1, 1, 2, 2])
ss = cval.ShuffleSplit(2)
for cv in [loo, lpo, kf, skf, lolo, lopo, ss, ps]:
for train, test in cv:
assert_not_equal(np.asarray(train).dtype.kind, 'b')
assert_not_equal(np.asarray(train).dtype.kind, 'b')
X[train], X[test]
y[train], y[test]
示例6: test_cross_val_generator_with_default_indices
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import ShuffleSplit [as 别名]
def test_cross_val_generator_with_default_indices():
X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
y = np.array([1, 1, 2, 2])
labels = np.array([1, 2, 3, 4])
loo = cval.LeaveOneOut(4)
lpo = cval.LeavePOut(4, 2)
kf = cval.KFold(4, 2)
skf = cval.StratifiedKFold(y, 2)
lolo = cval.LeaveOneLabelOut(labels)
lopo = cval.LeavePLabelOut(labels, 2)
ss = cval.ShuffleSplit(2)
ps = cval.PredefinedSplit([1, 1, 2, 2])
for cv in [loo, lpo, kf, skf, lolo, lopo, ss, ps]:
for train, test in cv:
assert_not_equal(np.asarray(train).dtype.kind, 'b')
assert_not_equal(np.asarray(train).dtype.kind, 'b')
X[train], X[test]
y[train], y[test]
示例7: data_split
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import ShuffleSplit [as 别名]
def data_split(inputfile,reads_count):
data = hkl.load(inputfile)
reads_count= hkl.load(reads_count)
X = data['mat']
X_kspec = data['kmer']
reads_count = np.array(reads_count)
y = np.mean(reads_count, axis = 1)
y = np.log(y+1e-3)
rs = ShuffleSplit(len(y), n_iter=1,random_state = 1)
X_kspec = X_kspec.reshape((X_kspec.shape[0],1024,4))
X = np.concatenate((X,X_kspec), axis = 1)
X = X[:,np.newaxis]
X = X.transpose((0,1,3,2))
for train_idx, test_idx in rs:
X_train = X[train_idx,:]
y_train = y[train_idx]
X_test = X[test_idx,:]
y_test = y[test_idx]
X_train = X_train.astype('float32')
y_train = y_train.astype('float32')
X_test = X_test.astype('float32')
y_test = y_test.astype('float32')
print 'Data prepration done!'
return [X_train, y_train, X_test, y_test]
#define the network architecture
示例8: __call__
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import ShuffleSplit [as 别名]
def __call__(self, X, y, net):
if self.eval_size is not None:
if net.regression or not self.stratify:
# test_size = self.eval_size
# kf = ShuffleSplit(
# y.shape[0], test_size=test_size,
# random_state=self.random_state
# )
# train_indices, valid_indices = next(iter(kf))
# valid_indices = shuffle(valid_indices)
test_size = 1 - self.eval_size
kf = ShuffleSplit(
y.shape[0], test_size=test_size,
random_state=self.random_state
)
valid_indices, train_indices = next(iter(kf))
else:
n_folds = int(round(1 / self.eval_size))
kf = StratifiedKFold(y, n_folds=n_folds, random_state=self.random_state)
train_indices, valid_indices = next(iter(kf))
X_train, y_train = X[train_indices], y[train_indices]
X_valid, y_valid = X[valid_indices], y[valid_indices]
else:
X_train, y_train = X, y
X_valid, y_valid = X[len(X):], y[len(y):]
return X_train, X_valid, y_train, y_valid
示例9: train_test_split
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import ShuffleSplit [as 别名]
def train_test_split(X, y, test_size=0.25, random_state=42, stratify=True):
if stratify:
n_folds = int(round(1 / test_size))
sss = StratifiedKFold(y, n_folds=n_folds, random_state=random_state)
else:
sss = ShuffleSplit(len(y), test_size=test_size, random_state=random_state)
train_idx, test_idx = iter(sss).next()
return X[train_idx], X[test_idx], y[train_idx], y[test_idx]
示例10: test_shuffle_split
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import ShuffleSplit [as 别名]
def test_shuffle_split():
ss1 = cval.ShuffleSplit(10, test_size=0.2, random_state=0)
ss2 = cval.ShuffleSplit(10, test_size=2, random_state=0)
ss3 = cval.ShuffleSplit(10, test_size=np.int32(2), random_state=0)
for typ in six.integer_types:
ss4 = cval.ShuffleSplit(10, test_size=typ(2), random_state=0)
for t1, t2, t3, t4 in zip(ss1, ss2, ss3, ss4):
assert_array_equal(t1[0], t2[0])
assert_array_equal(t2[0], t3[0])
assert_array_equal(t3[0], t4[0])
assert_array_equal(t1[1], t2[1])
assert_array_equal(t2[1], t3[1])
assert_array_equal(t3[1], t4[1])
示例11: test_shufflesplit_errors
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import ShuffleSplit [as 别名]
def test_shufflesplit_errors():
assert_raises(ValueError, cval.ShuffleSplit, 10, test_size=2.0)
assert_raises(ValueError, cval.ShuffleSplit, 10, test_size=1.0)
assert_raises(ValueError, cval.ShuffleSplit, 10, test_size=0.1,
train_size=0.95)
assert_raises(ValueError, cval.ShuffleSplit, 10, test_size=11)
assert_raises(ValueError, cval.ShuffleSplit, 10, test_size=10)
assert_raises(ValueError, cval.ShuffleSplit, 10, test_size=8, train_size=3)
assert_raises(ValueError, cval.ShuffleSplit, 10, train_size=1j)
assert_raises(ValueError, cval.ShuffleSplit, 10, test_size=None,
train_size=None)
示例12: test_shufflesplit_reproducible
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import ShuffleSplit [as 别名]
def test_shufflesplit_reproducible():
# Check that iterating twice on the ShuffleSplit gives the same
# sequence of train-test when the random_state is given
ss = cval.ShuffleSplit(10, random_state=21)
assert_array_equal(list(a for a, b in ss), list(a for a, b in ss))
示例13: crossVal_algo
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import ShuffleSplit [as 别名]
def crossVal_algo(k, algo, params, X, y, splittype, splitfrac, verbose=False): # performs cross_validation
if(splittype=='rand'):
rs2 = cross_validation.ShuffleSplit(len(X), n_iter=k, test_size=splitfrac)
elif(splittype=='timed'):
rs2 = cross_validation.KFold(n=len(X), n_folds=k)
max, max_params = 0, {}
par = []
for param in params.keys():
par.append(params[param])
for p in product(*par):
if(verbose):
print "val=", p
score = 0.0
for train, test in rs2:
X_train, y_train, X_test, y_test = X[train], y[train], X[test], y[test]
X_train = np.array([item for sublist in X_train for item in sublist])
y_train = np.array([item for sublist in y_train for item in sublist])
X_test = np.array([item for sublist in X_test for item in sublist])
y_test = np.array([item for sublist in y_test for item in sublist])
#print X_train.shape, y_train.shape, X_test.shape, y_test.shape
if(algo == 'svc'):
clf = LinearSVC(C=p[params.keys().index('C')],
penalty="l1", dual=False) ## Larger C increases model complexity
if(algo=='kNN'):
clf = KNeighborsClassifier(n_neighbors=p[params.keys().index('k')],
warn_on_equidistant=False, p=p[params.keys().index('p')])
if(algo=='linearSVM'):
clf = svm.SVC(kernel='linear', C=p[params.keys().index('C')])
if(algo=='polySVM'):
clf = svm.SVC(kernel='poly', degree = p[params.keys().index('degree')],
C=p[params.keys().index('C')])
if(algo=='rbfSVM'):
clf = svm.SVC(kernel='rbf', gamma = p[params.keys().index('gamma')],
C=p[params.keys().index('C')]) ## a smaller gamma gives a decision boundary with a smoother curvature
if(algo=='logit'):
clf = LogisticRegression(penalty=p[params.keys().index('penalty')], dual=False,
C=p[params.keys().index('C')])
if(algo=='tree'):
clf = ExtraTreesClassifier(n_estimators=p[params.keys().index('ne')], compute_importances=True, random_state=0)
if(algo=='randlog'):
clf = RandomizedLogisticRegression(C=p[params.keys().index('C')])
clf.fit(X_train, y_train)
score += clf.score(X_test, y_test)
score /= k
if(verbose):
print score
if score>max:
max = score
max_params = p
classifier = clf
return max, max_params, classifier
示例14: train_model
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import ShuffleSplit [as 别名]
def train_model(clf, X, Y, name="NB ngram", plot=False):
# create it again for plotting
cv = ShuffleSplit(
n=len(X), n_iter=10, test_size=0.3, indices=True, random_state=0)
train_errors = []
test_errors = []
scores = []
pr_scores = []
precisions, recalls, thresholds = [], [], []
for train, test in cv:
X_train, y_train = X[train], Y[train]
X_test, y_test = X[test], Y[test]
clf.fit(X_train, y_train)
train_score = clf.score(X_train, y_train)
test_score = clf.score(X_test, y_test)
train_errors.append(1 - train_score)
test_errors.append(1 - test_score)
scores.append(test_score)
proba = clf.predict_proba(X_test)
fpr, tpr, roc_thresholds = roc_curve(y_test, proba[:, 1])
precision, recall, pr_thresholds = precision_recall_curve(
y_test, proba[:, 1])
pr_scores.append(auc(recall, precision))
precisions.append(precision)
recalls.append(recall)
thresholds.append(pr_thresholds)
if plot:
scores_to_sort = pr_scores
median = np.argsort(scores_to_sort)[len(scores_to_sort) / 2]
plot_pr(pr_scores[median], name, phase, precisions[median],
recalls[median], label=name)
summary = (np.mean(scores), np.std(scores),
np.mean(pr_scores), np.std(pr_scores))
print "%.3f\t%.3f\t%.3f\t%.3f\t" % summary
return np.mean(train_errors), np.mean(test_errors)
开发者ID:PacktPublishing,项目名称:Building-Machine-Learning-Systems-With-Python-Second-Edition,代码行数:50,代码来源:02_tuning.py
示例15: train_model
# 需要导入模块: from sklearn import cross_validation [as 别名]
# 或者: from sklearn.cross_validation import ShuffleSplit [as 别名]
def train_model(clf, X, Y, name="NB ngram", plot=False):
# create it again for plotting
cv = ShuffleSplit(
n=len(X), n_iter=10, test_size=0.3, indices=True, random_state=0)
train_errors = []
test_errors = []
scores = []
pr_scores = []
precisions, recalls, thresholds = [], [], []
clfs = [] # just to later get the median
for train, test in cv:
X_train, y_train = X[train], Y[train]
X_test, y_test = X[test], Y[test]
clf.fit(X_train, y_train)
clfs.append(clf)
train_score = clf.score(X_train, y_train)
test_score = clf.score(X_test, y_test)
train_errors.append(1 - train_score)
test_errors.append(1 - test_score)
scores.append(test_score)
proba = clf.predict_proba(X_test)
fpr, tpr, roc_thresholds = roc_curve(y_test, proba[:, 1])
precision, recall, pr_thresholds = precision_recall_curve(
y_test, proba[:, 1])
pr_scores.append(auc(recall, precision))
precisions.append(precision)
recalls.append(recall)
thresholds.append(pr_thresholds)
if plot:
scores_to_sort = pr_scores
median = np.argsort(scores_to_sort)[len(scores_to_sort) / 2]
plot_pr(pr_scores[median], name, phase, precisions[median],
recalls[median], label=name)
log_false_positives(clfs[median], X_test, y_test, name)
summary = (np.mean(scores), np.std(scores),
np.mean(pr_scores), np.std(pr_scores))
print "%.3f\t%.3f\t%.3f\t%.3f\t" % summary
return np.mean(train_errors), np.mean(test_errors)
开发者ID:PacktPublishing,项目名称:Building-Machine-Learning-Systems-With-Python-Second-Edition,代码行数:55,代码来源:03_clean.py