Python utils.shuffle函数代码示例

本文整理汇总了Python中sklearn.utils.shuffle函数的典型用法代码示例。如果您正苦于以下问题：Python shuffle函数的具体用法？Python shuffle怎么用？Python shuffle使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了shuffle函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: generate_feature

def generate_feature(in_file, dump=False, single_only=False, min_count=0):
  f = open(in_file, 'r')
  f.readline()
  training_data, tags = [], []
  total_features = {}

  for line in f.readlines():
    tokens = line.replace('\n', '').split(',')
    fs = [s for s in tokens[1:] if s.isdigit()]
    # ignore invalid data
    if len(fs) != 10:
      continue
    tags.append(tokens[0])
    features = get_feature_array(fs, single_only)
    update_total_features(total_features, features)
    training_data.append(features)

  training_data = transform_to_matrix(total_features, training_data)
  training_data = cut_off(training_data, min_count)
  shuffle(training_data, tags)
  tags = np.array(tags)
  if dump:
    np.savetxt('preprocessing/dumpX.txt', training_data, fmt='%d', delimiter=',')
    np.savetxt('preprocessing/dumpY.txt', tags[np.newaxis].T, fmt='%s', delimiter=',')
  return total_features, training_data, np.array(tags)

开发者ID:joshua924，项目名称:MachineLearningProject_Team509，代码行数:25，代码来源:feature_generation.py

示例2: _subsample_data

 def _subsample_data(self, X, Y, n=10000):
   if Y is not None:
     X, Y = shuffle(X, Y)
     return X[:n], Y[:n]
   else:
     X = shuffle(X)
     return X[:n]

开发者ID:lazyprogrammer，项目名称:machine_learning_examples，代码行数:7，代码来源:fake_neural_net.py

示例3: main

def main(is_binary=True):
    train, test, word2idx = get_ptb_data()

    for t in train:
        add_idx_to_tree(t, 0)
    train = [tree2list(t, -1, is_binary) for t in train]
    if is_binary:
        train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels

    for t in test:
        add_idx_to_tree(t, 0)
    test = [tree2list(t, -1, is_binary) for t in test]
    if is_binary:
        test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels

    train = shuffle(train)
    train = train[:5000]
    # n_pos = sum(t[3][-1] for t in train)
    # print "n_pos train:", n_pos
    test = shuffle(test)
    test = test[:1000]
    # n_pos = sum(t[3][-1] for t in test)
    # print "n_pos test:", n_pos

    V = len(word2idx)
    print "vocab size:", V
    D = 20
    K = 2 if is_binary else 5

    model = RecursiveNN(V, D, K)
    model.fit(train)
    print "train accuracy:", model.score(train)
    print "test accuracy:", model.score(test)
    print "train f1:", model.f1_score(train)
    print "test f1:", model.f1_score(test)

开发者ID:renjinghai，项目名称:machine_learning_examples，代码行数:35，代码来源:rntn_theano.py

示例4: splitIntoTrainingAndValidation

def splitIntoTrainingAndValidation(A, B):
	data1 = shuffle(sourceSets[A])    # Note this is a random shuffle, that's
	data2 = shuffle(sourceSets[B])    #                                   why we need many iterations
	freqM = np.minimum(freqs[A], freqs[B])
	freq1tr = np.round(freqM * 0.8)        # Randomly selected 80% for the training set,
	freq1va = freqM - freq1tr              # and the remaining 20% for the validation set
	freq2tr = np.copy(freq1tr)
	freq2va = np.copy(freq1va)
	trainingSetSize = int(sum(freq1tr))  # 1/2 size actually
	validatnSetSize = int(sum(freq1va))
	testSet1size = len(data1) - trainingSetSize - validatnSetSize
	testSet2size = len(data2) - trainingSetSize - validatnSetSize
	X  = np.zeros((trainingSetSize*2,         numFeatures))
	Xv = np.zeros((validatnSetSize*2,         numFeatures))
	Xt = np.zeros((testSet1size+testSet2size, numFeatures))
	y  = np.ravel([([0]*trainingSetSize) + ([1]*trainingSetSize)])
	yv = np.ravel([([0]*validatnSetSize) + ([1]*validatnSetSize)])
	yt = np.ravel([([0]*testSet1size)    + ([1]*testSet2size)])
	trnIdx = vldIdx = tstIdx = 0
	for item in data1:
		year = item[0]
		if   freq1tr[year] > 0:   X[trnIdx], trnIdx, freq1tr[year]  =  item[1:],  trnIdx+1,  freq1tr[year]-1
		elif freq1va[year] > 0:  Xv[vldIdx], vldIdx, freq1va[year]  =  item[1:],  vldIdx+1,  freq1va[year]-1
		else:                    Xt[tstIdx], tstIdx                 =  item[1:],  tstIdx+1
	assert trnIdx==trainingSetSize   and vldIdx==validatnSetSize   and tstIdx==testSet1size
	for item in data2:
		year = item[0]
		if   freq2tr[year] > 0:   X[trnIdx], trnIdx, freq2tr[year]  =  item[1:],  trnIdx+1,  freq2tr[year]-1
		elif freq2va[year] > 0:  Xv[vldIdx], vldIdx, freq2va[year]  =  item[1:],  vldIdx+1,  freq2va[year]-1
		else:                    Xt[tstIdx], tstIdx                 =  item[1:],  tstIdx+1
	assert trnIdx==trainingSetSize*2 and vldIdx==validatnSetSize*2 and tstIdx==testSet1size+testSet2size
	X, y = shuffle(X, y)   # Just in case... perhaps no reason to shuffle again here?
	fs = SelectKBest(f_classif, k = numFeatures)   # TODO: try other feature selection methods?
	fs.fit(np.concatenate((X, Xv)), np.concatenate((y, yv)))
	return X, y, Xv, yv, Xt, yt, testSet1size, testSet2size, fs.scores_

开发者ID:lelou6666，项目名称:WavesOfWhat，代码行数:35，代码来源:Validation_and_testing.py

示例5: compute_distances_and_pairs

    def compute_distances_and_pairs(self, pdb_file, nr_contacts=None, nr_noncontacts=None):
        #distance and contacts
        self.features['pair']['Cbdist'] = pdb.distance_map(pdb_file, self.L)

        #mask positions that have too many gaps
        gap_freq = 1 - (self.Ni / self.neff)
        highly_gapped_pos = np.where(gap_freq > self.max_gap_percentage)[0]
        self.features['pair']['Cbdist'][:,highly_gapped_pos] = np.nan
        self.features['pair']['Cbdist'][highly_gapped_pos, :] = np.nan

        #if there are unresolved residues, there will be nan in the distance_map
        with np.errstate(invalid='ignore'):
            self.features['pair']['contact'] = (self.features['pair']['Cbdist'] <= self.contact_threshold) * 1
            self.features['pair']['nocontact'] = (self.features['pair']['Cbdist'] > self.non_contact_threshold) * 1

        indices_contact = np.where(np.triu(self.features['pair']['contact'], k=self.seq_separation))
        indices_contact = tuple(shuffle(indices_contact[0],indices_contact[1], random_state=0))
        if nr_contacts:
            indices_contact = indices_contact[0][:nr_contacts], indices_contact[1][:nr_contacts]

        indices_nocontact = np.where(np.triu(self.features['pair']['nocontact'], k=self.seq_separation))
        indices_nocontact = tuple(shuffle(indices_nocontact[0],indices_nocontact[1], random_state=0))
        if nr_noncontacts:
            indices_nocontact = indices_nocontact[0][:nr_noncontacts], indices_nocontact[1][:nr_noncontacts]


        #update indices of i<j for only relevant pairs
        self.ij_ind_upper = np.array(list(indices_contact[0]) + list(indices_nocontact[0])), np.array(list(indices_contact[1]) + list(indices_nocontact[1]))

开发者ID:susannvorberg，项目名称:contact_prediction，代码行数:28，代码来源:AlignmentFeatures.py

示例6: get_aa_cross_val

def get_aa_cross_val(L, X, Y, AA, tsize=None, rstate=-1):
    """Get test data from dataset"""
    test_position = []
    aa_y = np.zeros(Y.shape)
    for i in xrange(len(Y)):
        if L[i][-1] == AA:
            aa_y[i] = 1
            test_position.append(i)

    if tsize:
        t_len = int(tsize * len(Y))
        # positions that are 0 without being the one for AA
        zero_pos = np.where(np.logical_and(Y == 0, aa_y == 0))[0]
        clen = t_len - len(test_position)
        if clen > 0:
            random_zero_pos = np.random.choice(zero_pos, clen, replace=False)
            test_position.extend(random_zero_pos)

    test_position = np.random.permutation(test_position)
    mask = np.ones(Y.shape, dtype=bool)
    mask[test_position] = False
    train_position = np.array(range(len(mask)))[mask]

    if rstate > 0:
        return shuffle(train_position, random_state=rstate), shuffle(test_position, random_state=rstate)
    # in this case, suppose we want only the train and test index
    else:
        return train_position, test_position

开发者ID:UdeM-LBIT，项目名称:CoreTracker，代码行数:28，代码来源:classifier.py

示例7: generator3

def generator3(samples, batch_size=32):
    num_samples = len(samples)
    
    while 1: # Loop forever so the generator never terminates
        shuffle(samples)
        for offset in range(0, num_samples, batch_size):
            batch_samples = samples[offset:offset+batch_size]

            car_images = []
            steering_angles = []
            for batch_sample in batch_samples:
                img_center = cv2.imread(path+batch_sample[0].split('\\')[-1])
                img_left   = cv2.imread(path+batch_sample[1].split('\\')[-1])
                img_right  = cv2.imread(path+batch_sample[2].split('\\')[-1])
                
                correction = 0.3 # this is a parameter to tune
                steering_center = float(batch_sample[3])
                steering_left   = steering_center + correction
                steering_right  = steering_center - correction
                
                # add images and angles to data set
                car_images.extend([img_center, img_left, img_right])
                steering_angles.extend([steering_center, steering_left, steering_right])
                
            # trim image to only see section with road
            X_train = np.array(car_images)
            y_train = np.array(steering_angles)
            yield shuffle(X_train, y_train)

开发者ID:chauvinj735，项目名称:Behavior-Cloning，代码行数:28，代码来源:model.py

示例8: import_images

def import_images():
	#IMPLEMENT TIMER CUTOFF FR+OR IF FEAT EXT TAKES TOO LONG
	d_feats = {'orb': []}
	c_feats = {'orb': []}
	(cat_paths, dog_paths) = get_filenames(TRAINING_FOLDER)
	cat_train_pts = []
	dog_train_pts = []
	for image_fn in shuffle(dog_paths, n_samples = 400, random_state=0):
		odesc_pts = extract_desc_pts(image_fn)
		try:
			for pt in odesc_pts:
				d_feats['orb'].append(pt)
		except TypeError:
			print image_fn
			continue
	for image_fn in shuffle(cat_paths, n_samples = 400, random_state=0):
		odesc_pts = extract_desc_pts(image_fn)
		try:
			for pt in odesc_pts:
				c_feats['orb'].append(pt)
		except TypeError:
			print image_fn
			continue
	cat_k_means = KMeans(n_jobs=-1, n_clusters=200)
	cat_k_means.fit(c_feats['orb'])
	print 'dog calc'
	dog_k_means = KMeans(n_jobs=-1, n_clusters=200)
	dog_k_means.fit(d_feats['orb'])
	print 'saving....'
	with open('/home/max/CVD/d_o200c200s400.pickle', 'wb') as handle:
		pickle.dump(dog_k_means.cluster_centers_, handle)
	with open('/home/max/CVD/c_o200c200s400.pickle', 'wb') as handle:
		pickle.dump(cat_k_means.cluster_centers_, handle)
	return '\n\n\n DONE   '

开发者ID:Bingjiling，项目名称:Cat-VS-Dog，代码行数:34，代码来源:CVD_feat.py

示例9: generate_training_data

def generate_training_data(image_paths, angles, batch_size=128, validation_flag=False):
    '''
    method for the model training data generator to load, process, and distort images, then yield them to the
    model. if 'validation_flag' is true the image is not distorted. also flips images with turning angle magnitudes of greater than 0.33, as to give more weight to them and mitigate bias toward low and zero turning angles
    '''
    image_paths, angles = shuffle(image_paths, angles)
    X,y = ([],[])
    while True:       
        for i in range(len(angles)):
            img = cv2.imread(image_paths[i])
            angle = angles[i]
            img = preprocess_image(img)
            if not validation_flag:
                img, angle = random_distort(img, angle)
            X.append(img)
            y.append(angle)
            if len(X) == batch_size:
                yield (np.array(X), np.array(y))
                X, y = ([],[])
                image_paths, angles = shuffle(image_paths, angles)
            # flip horizontally and invert steer angle, if magnitude is > 0.33
            if abs(angle) > 0.33:
                img = cv2.flip(img, 1)
                angle *= -1
                X.append(img)
                y.append(angle)
                if len(X) == batch_size:
                    yield (np.array(X), np.array(y))
                    X, y = ([],[])
                    image_paths, angles = shuffle(image_paths, angles)

开发者ID:Shtaiven，项目名称:CarND-Behavioral-Cloning-Project，代码行数:30，代码来源:model.py

示例10: splitIntoTrainingValidation

def splitIntoTrainingValidation(A, B):  # TODO: 3rd parameter: the desired value of (validatSet1size + validatSet2size)
	data1 = shuffle(sourceSets[A])    # Note this is a random shuffle, that's
	data2 = shuffle(sourceSets[B])    #                                   why we need many iterations
	freq1 = np.minimum(freqs[A], freqs[B])
	if sum(freq1) > maxTrainSetSz:  freq1 = np.round(freq1 * (maxTrainSetSz * 1.0 / sum(freq1)))
	trainingSetSize = int(sum(freq1))  # Half size actually.  Approximately <= maxTrainSetSz
	validatSet1size = len(data1) - trainingSetSize
	validatSet2size = len(data2) - trainingSetSize
	X  = np.zeros((trainingSetSize*2,               numFeatures))
	Xv = np.zeros((validatSet1size+validatSet2size, numFeatures))
	y  = np.ravel([([0]*trainingSetSize) + ([1]*trainingSetSize)])
	yv = np.ravel([([0]*validatSet1size) + ([1]*validatSet2size)])
	freq2  = np.copy(freq1)
	trnIdx = valIdx = 0
	for item in data1:
		year = item[0]
		if freq1[year] > 0:
					freq1[year]-=1
					X[trnIdx] = item[1:]
					trnIdx+=1
		else:
			  Xv[valIdx] = item[1:]
			  valIdx += 1
	assert trnIdx==trainingSetSize and valIdx==validatSet1size
	for item in data2:
		year = item[0]
		if freq2[year] > 0:
					freq2[year]-=1
					X[trnIdx] = item[1:]
					trnIdx+=1
		else:
			  Xv[valIdx] = item[1:]
			  valIdx += 1
	assert trnIdx==trainingSetSize*2 and valIdx==validatSet1size+validatSet2size
	return X, y, Xv, yv, validatSet1size, validatSet2size

开发者ID:boris-k，项目名称:WavesOfWhat，代码行数:35，代码来源:Classify.py

示例11: cluster

def cluster(m, n_colors=32):
    from sklearn.utils import shuffle
    from sklearn.cluster import KMeans
    from sklearn.metrics import pairwise_distances_argmin

    def recreate_image(codebook, labels, w, h):
        """Recreate the (compressed) image from the code book & labels"""
        d = codebook.shape[1]
        image = np.zeros((w, h, d))
        label_idx = 0
        for i in range(w):
            for j in range(h):
                image[i][j] = codebook[labels[label_idx]]
                label_idx += 1
        return image

    # Load Image and transform to a 2D numpy array.
    w, h, d = original_shape = tuple(m.shape)
    image_array = np.reshape(m, (w * h, d))
    image_array_sample = shuffle(image_array, random_state=0)[:1000]
    kmeans = KMeans(n_clusters=n_colors).fit(image_array_sample)

    codebook_random = shuffle(image_array, random_state=0)[:n_colors + 1]
    labels_random = pairwise_distances_argmin(codebook_random, image_array, axis=0)

    return recreate_image(codebook_random, labels_random, w, h)

开发者ID:salvador-dali，项目名称:hackerrank_ai，代码行数:26，代码来源:p_7_rubiks_cube_investigation.py

示例12: main

def main():
    train, test, word2idx = get_ptb_data()

    for t in train:
        add_idx_to_tree(t, 0)
    train = [tree2list(t, -1, True) for t in train]
    train = [t for t in train if t[3][-1] >= 0] # for filtering binary labels

    for t in test:
        add_idx_to_tree(t, 0)
    test = [tree2list(t, -1, True) for t in test]
    test = [t for t in test if t[3][-1] >= 0] # for filtering binary labels

    train = shuffle(train)
    train = train[:1000]
    # n_pos = sum(t[3][-1] for t in train)
    # print "n_pos train:", n_pos
    test = shuffle(test)
    test = test[:100]
    # n_pos = sum(t[3][-1] for t in test)
    # print "n_pos test:", n_pos

    V = len(word2idx)
    print "vocab size:", V
    D = 80
    K = 5

    model = RecursiveNN(V, D, K)
    model.fit(train, epochs=3, activation=T.nnet.relu)
    print "train accuracy:", model.score(train)
    print "test accuracy:", model.score(test)
    print "train f1:", model.f1_score(train)
    print "test f1:", model.f1_score(test)

开发者ID:CesarChaMal，项目名称:machine_learning_examples，代码行数:33，代码来源:rntn_theano.py

示例13: process_data

def process_data():
  global num_classes, num_train, num_test

  X_train , Y_train = load_data('Train')
  X_test , Y_test = load_data('Test')
  X_train = X_train.astype(np.float64)
  X_test = X_test.astype(np.float64)
  num_train = X_train.shape[0]
  num_test = X_test.shape[0]

  mean_image = np.mean(X_train,axis=0)
  X_train -= mean_image
  X_test -= mean_image

  X_train = X_train.reshape(-1, 1, img_dim, img_dim)
  Y_train -= 1
  X_train , Y_train = shuffle(X_train, Y_train)

  X_test = X_test.reshape(-1, 1, img_dim, img_dim)
  Y_test -= 1
  X_test , Y_test = shuffle(X_test, Y_test)

  print 'Training X shape :- ', X_train.shape
  print 'Training Y shape :- ', Y_train.shape
  print 'Testing X shape :- ', X_test.shape
  print 'Testing Y shape :- ', Y_test.shape

  return X_train, Y_train, X_test, Y_test

开发者ID:PankajKataria，项目名称:BanglaReco，代码行数:28，代码来源:solution.py

示例14: frames2batch

 def frames2batch(k = 12,batch_size = 1024, is_calib = False):
     pos = util.get_files(rootdir = 'F:\\train_data\\pos\\')
     neg = util.get_files(rootdir = 'F:\\train_data\\neg\\')
     pos = shuffle(pos)
     neg = shuffle(neg)
     total = pos + neg
     total  = shuffle(total)
     batch = []
     c = 0
     bpath = 'F:\\train_data\\batch\\'
     for item_path in total:
         
         frame = fr.get_frame(item_path)
         frame_r = fr.resize_frame(frame,(k,k))
         if frame_r == None:
             continue
         vec = fr.frame_to_vect(frame_r)
         label = 1 if item_path.split('\\')[-1].find('pos') > 0 else 0
         print(item_path,label)
         batch.append((vec,label))
         if len(batch) > 0 and len(batch) % batch_size == 0:
             batch = sp.array(batch)
             sp.savez(bpath + str(c) + '_' + str(k) + ('_' if not is_calib else '_calib-')  + 'net',batch)
             batch = []
             
             c += 1
     if len(batch) > 0 and len(batch) % batch_size == 0:
         batch = sp.array(batch)
         sp.savez(bpath + str(c) + '_' + str(k) + ('_' if not is_calib else '_calib')  + '-net',batch)
         batch = []
         c += 1

开发者ID:gogolgrind，项目名称:Cascade-CNN-Face-Detection，代码行数:31，代码来源:datasets.py

示例15: getMNIST

def getMNIST():
    # data shape: train (50000, 784), test (10000, 784)
    # already scaled from 0..1 and converted to float32
    datadir = '../large_files/'
    if not os.path.exists(datadir):
        datadir = ''

    input_file = "%smnist.pkl.gz" % datadir
    if not os.path.exists(input_file):
        url = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
        with open(input_file, "wb") as out:
            f = urllib2.urlopen(url)
            out.write(f.read())
            out.flush()

    with gzip.open(input_file) as f:
        train, valid, test = cPickle.load(f)

    Xtrain, Ytrain = train
    Xvalid, Yvalid = valid
    Xtest, Ytest = test

    Ytrain_ind = y2indicator(Ytrain)
    Ytest_ind = y2indicator(Ytest)

    Xtrain, Ytrain = shuffle(Xtrain, Ytrain)
    Xtest, Ytest = shuffle(Xtest, Ytest)

    # try to take a smaller sample
    Xtrain = Xtrain[0:30000]
    Ytrain = Ytrain[0:30000]
    Xtest = Xtest[0:1000]
    Ytest = Ytest[0:1000]

    return Xtrain.reshape(len(Xtrain), 1, 28, 28), Ytrain, Ytrain_ind, Xtest.reshape(len(Xtest), 1, 28, 28), Ytest, Ytest_ind

开发者ID:CesarChaMal，项目名称:machine_learning_examples，代码行数:35，代码来源:renet.py

注：本文中的sklearn.utils.shuffle函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。