本文整理汇总了Python中utils.sample函数的典型用法代码示例。如果您正苦于以下问题:Python sample函数的具体用法?Python sample怎么用?Python sample使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了sample函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: generate_text
def generate_text(session, model, config, starting_text='<eos>',
stop_length=100, stop_tokens=None, temp=1.0):
"""Generate text from the model.
Hint: Create a feed-dictionary and use sess.run() to execute the model. Note
that you will need to use model.initial_state as a key to feed_dict
Hint: Fetch model.final_state and model.predictions[-1]. (You set
model.final_state in add_model() and model.predictions is set in
__init__)
Hint: Store the outputs of running the model in local variables state and
y_pred (used in the pre-implemented parts of this function.)
Args:
session: tf.Session() object
model: Object of type RNNLM_Model
config: A Config() object
starting_text: Initial text passed to model.
Returns:
output: List of word idxs
"""
state = model.initial_state.eval()
# Imagine tokens as a batch size of one, length of len(tokens[0])
tokens = [model.vocab.encode(word) for word in starting_text.split()]
# Use starting_text to compute the initial_state (LIBIN)
for wd in tokens:
feed = {model.input_placeholder: np.array([[wd]]),
model.initial_state: state,
model.dropout_placeholder: 1.0}
state, y_pred = session.run([model.final_state, model.predictions[-1]], feed_dict=feed)
# First word predicted by starting_text
# Add it to tokens and use it as input of next step
next_word_idx = sample(y_pred[0], temperature=temp)
tokens.append(next_word_idx)
for i in xrange(stop_length):
### YOUR CODE HERE
# input_placeholder is of shape : (batch_size, num_steps)
# We have batch_size=1 and num_steps=1 here
feed = {model.input_placeholder: np.array([[tokens[-1]]]),
model.initial_state: state,
model.dropout_placeholder: 1.0}
state, y_pred = session.run([model.final_state, model.predictions[-1]], feed_dict=feed)
### END YOUR CODE
# y_pred shape : (1, len(vocab))
# And y_pred[0] gives a (len(vocad), ) 1-D tensor, each element of which gives the
# probability of current word
# And next_word_idx would be the index of the word that has the highest probability
next_word_idx = sample(y_pred[0], temperature=temp)
tokens.append(next_word_idx)
if stop_tokens and model.vocab.decode(tokens[-1]) in stop_tokens:
break
output = [model.vocab.decode(word_idx) for word_idx in tokens]
return output
示例2: main
def main(*args):
import argparse
parser = argparse.ArgumentParser(
description='Run Recommendations',
formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument('-u', '--user', type=str, choices=USER_FILES,
default='test_user',
metavar='USER',
help='user file, e.g.\n' +
'{{{}}}'.format(','.join(sample(USER_FILES, 3))))
parser.add_argument('-k', '--k', type=int, help='for k-means')
parser.add_argument('-q', '--query', choices=CATEGORIES,
metavar='QUERY',
help='search for restaurants by category e.g.\n'
'{{{}}}'.format(','.join(sample(CATEGORIES, 3))))
parser.add_argument('-p', '--predict', action='store_true',
help='predict ratings for all restaurants')
parser.add_argument('-r', '--restaurants', action='store_true',
help='outputs a list of restaurant names')
args = parser.parse_args()
# Output a list of restaurant names
if args.restaurants:
print('Restaurant names:')
for restaurant in sorted(ALL_RESTAURANTS, key=restaurant_name):
print(repr(restaurant_name(restaurant)))
exit(0)
# Select restaurants using a category query
if args.query:
restaurants = search(args.query, ALL_RESTAURANTS)
else:
restaurants = ALL_RESTAURANTS
# Load a user
assert args.user, 'A --user is required to draw a map'
user = load_user_file('{}.dat'.format(args.user))
# Collect ratings
if args.predict:
print(241, restaurants)
ratings = rate_all(user, restaurants, feature_set())
else:
restaurants = user_reviewed_restaurants(user, restaurants)
names = [restaurant_name(r) for r in restaurants]
ratings = {name: user_rating(user, name) for name in names}
# Draw the visualization
if args.k:
centroids = k_means(restaurants, min(args.k, len(restaurants)))
else:
centroids = [restaurant_location(r) for r in restaurants]
draw_map(centroids, restaurants, ratings)
示例3: main
def main(*args):
import argparse
parser = argparse.ArgumentParser(description="Run Recommendations", formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument(
"-u",
"--user",
type=str,
choices=USER_FILES,
default="test_user",
metavar="USER",
help="user file, e.g.\n" + "{{{}}}".format(",".join(sample(USER_FILES, 3))),
)
parser.add_argument("-k", "--k", type=int, help="for k-means")
parser.add_argument(
"-q",
"--query",
choices=CATEGORIES,
metavar="QUERY",
help="search for restaurants by category e.g.\n" "{{{}}}".format(",".join(sample(CATEGORIES, 3))),
)
parser.add_argument("-p", "--predict", action="store_true", help="predict ratings for all restaurants")
args = parser.parse_args()
# Select restaurants using a category query
if args.query:
results = search(args.query, RESTAURANTS.values())
restaurants = {restaurant_name(r): r for r in results}
else:
restaurants = RESTAURANTS
# Load a user
assert args.user, "A --user is required to draw a map"
user = load_user_file("{}.dat".format(args.user))
# Collect ratings
if args.predict:
ratings = rate_all(user, restaurants, feature_set())
else:
restaurants = user_reviewed_restaurants(user, restaurants)
ratings = {name: user_rating(user, name) for name in restaurants}
# Draw the visualization
restaurant_list = list(restaurants.values())
if args.k:
centroids = k_means(restaurant_list, min(args.k, len(restaurant_list)))
else:
centroids = [restaurant_location(r) for r in restaurant_list]
draw_map(centroids, restaurant_list, ratings)
示例4: main
def main(FLAGS):
"""
"""
if FLAGS.mode == 'train':
# Process the data
train_data, test_data = process_data(
data_dir=FLAGS.data_dir,
split_ratio=FLAGS.split_ratio,
)
# Sample
sample(
data=train_data,
data_dir=FLAGS.data_dir,
)
# Load components
with open(os.path.join(basedir, FLAGS.data_dir, 'char2index.json'), 'r') as f:
char2index = json.load(f)
# Training
train(
data_dir=FLAGS.data_dir,
char2index=char2index,
train_data=train_data,
test_data=test_data,
num_epochs=FLAGS.num_epochs,
batch_size=FLAGS.batch_size,
num_filters=FLAGS.num_filters,
learning_rate=FLAGS.lr,
decay_rate=FLAGS.decay_rate,
max_grad_norm=FLAGS.max_grad_norm,
dropout_p=FLAGS.dropout_p,
)
elif FLAGS.mode == 'infer':
# Inference
infer(
data_dir=FLAGS.data_dir,
model_name=FLAGS.model_name,
sentence=FLAGS.sentence,
)
else:
raise Exception('Choose --mode train|infer')
示例5: compute
def compute(self, size):
"""
self.points is a vector with n rows and d cols
bi its a vector of with klogn rows and d dols
dist(i) represents the sens(p_i) as in the formula discussed.
"""
e = w_kmeans.Kmeans(self.points, np.expand_dims(self.weights, axis=0), self.k, 10)
bi = e.compute()
dist = utils.get_dist_to_centers(self.points, bi) #find distance of each point to its nearset cluster
if self.weights is not None: # its always not none!!!
dist /= np.sum(dist) #norm
dist *= 2
c = utils.get_centers(self.points, bi)#get centers
c = self.find_cluester_size_weighted(c, W=self.weights)#get weighted size of center's cluster
dist += ((4.0)/(c)) #add to each point the size of its cluster as at the formula
t = np.sum(dist*self.weights)
weights = 1/(dist*size)
weights *= t
# print t
dist *= self.weights
dist /= np.sum(dist)
prob = dist # its actually the sampling probability
points, weights = utils.sample(self.points, prob, size, weights=weights)
return points, weights
示例6: bio_detector_experiment_prok_euk
def bio_detector_experiment_prok_euk(filename=None,pickle_filename=None):
#use data from prok_euk_ic_gini_experiment; Figure 4 in Gini Paper
if pickle_filename is None:
prok_motifs = bio_motifs
euk_motifs = [motif if len(motif) <= 200 else sample(200,motif,replace=False)
for motif in euk_motifs]
with open("prok_euk_ic_gini_experiment.pkl") as f:
(prok_maxents, prok_uniforms, euk_maxents, euk_uniforms) = cPickle.load(f)
prok_bio_ginis = map(motif_gini, prok_motifs)
euk_bio_ginis = map(motif_gini, euk_motifs)
prok_ps = [percentile(bio_gini,map(motif_gini,spoofs)) for bio_gini,spoofs in zip(prok_bio_ginis,prok_maxents)]
prok_spoofs = [spoofs[0] for spoofs in prok_maxents]
prok_neg_ps = [percentile(motif_gini(spoof),map(motif_gini,spoofs))
for spoof,spoofs in zip(prok_spoofs,prok_maxents)]
euk_ps = [percentile(bio_gini,map(motif_gini,spoofs)) for bio_gini,spoofs in zip(euk_bio_ginis,euk_maxents)]
euk_spoofs = [spoofs[0] for spoofs in euk_maxents]
euk_neg_ps = [percentile(motif_gini(spoof),map(motif_gini,spoofs))
for spoof,spoofs in zip(euk_spoofs,euk_maxents)]
with open("bio_detector_experiment_prok_euk.pkl",'w') as f:
cPickle.dump((prok_ps,euk_ps,prok_neg_ps,euk_neg_ps),f)
else:
with open(pickle_filename) as f:
(prok_ps,euk_ps,prok_neg_ps,euk_neg_ps) = cPickle.load(f)
sns.set_style('white')
#sns.set_palette('gray')
sns.set_palette(sns.cubehelix_palette(3))
roc_curve(prok_ps + euk_ps,prok_neg_ps + euk_neg_ps,color='black')
plt.xlabel("FPR",fontsize='large')
plt.ylabel("TPR",fontsize='large')
maybesave(filename)
示例7: gini_vs_mi_comparison
def gini_vs_mi_comparison(filename=None):
sys.path.append("/home/pat/jaspar")
from parse_jaspar import euk_motifs
euk_motifs = [motif if len(motif) <= 200 else sample(200,motif,replace=False)
for motif in euk_motifs]
prok_ginis = map(motif_gini,bio_motifs)
prok_mis = map(total_motif_mi,tqdm(bio_motifs))
prok_mipps = map(motif_mi_pp,tqdm(bio_motifs))
eu_ginis = map(motif_gini,jaspar_motifs)
eu_mis = map(total_motif_mi,tqdm(jaspar_motifs))
eu_mipps = map(motif_mi_pp,tqdm(jaspar_motifs))
plt.subplot(1,2,1)
plt.scatter(prok_ginis,prok_mipps)
plt.xlabel("Gini Coefficient")
plt.ylabel("MI (bits / column pair)")
plt.title("Prokaryotic Motifs")
plt.xlim(-.1,.7)
plt.ylim(-0.1,0.7)
plt.subplot(1,2,2)
plt.scatter(eu_ginis,eu_mipps)
plt.xlabel("Gini Coefficient")
plt.xlim(-.1,.7)
plt.ylim(-0.1,0.7)
plt.title("Eukaryotic Motifs")
plt.suptitle("Mutual Information vs Gini Coefficient")
maybesave(filename)
示例8: generate_text
def generate_text(session, model, config, starting_text='<eos>',
stop_length=100, stop_tokens=None, temp=1.0):
"""Generate text from the model.
Hint: Create a feed-dictionary and use sess.run() to execute the model. Note
that you will need to use model.initial_state as a key to feed_dict
Hint: Fetch model.final_state and model.predictions[-1]. (You set
model.final_state in add_model() and model.predictions is set in
__init__)
Hint: Store the outputs of running the model in local variables state and
y_pred (used in the pre-implemented parts of this function.)
Args:
session: tf.Session() object
model: Object of type RNNLM_Model
config: A Config() object
starting_text: Initial text passed to model.
Returns:
output: List of word idxs
"""
state = model.initial_state.eval()
# Imagine tokens as a batch size of one, length of len(tokens[0])
tokens = [model.vocab.encode(word) for word in starting_text.split()]
for i in xrange(stop_length):
### YOUR CODE HERE
raise NotImplementedError
### END YOUR CODE
next_word_idx = sample(y_pred[0], temperature=temp)
tokens.append(next_word_idx)
if stop_tokens and model.vocab.decode(tokens[-1]) in stop_tokens:
break
output = [model.vocab.decode(word_idx) for word_idx in tokens]
return output
示例9: compose_async
def compose_async(song_key):
model = get_model()
while True:
diversity = random.uniform(0.7, 1.0)
sentence = '#' * MEMORY_LENGTH + 'X:'
sentence = sentence[-MEMORY_LENGTH:]
generated = 'X:'
while True:
x = np.zeros((1, MEMORY_LENGTH, len(model.chars)))
for t, char in enumerate(sentence):
x[0, t, model.char_indices[char]] = 1.
preds = model.predict(x, verbose=0)[0]
next_index = utils.sample(preds, diversity)
next_char = model.indices_char[next_index]
sentence = sentence[-MEMORY_LENGTH + 1:] + next_char
generated += next_char
if generated.endswith('$$$'):
try:
song = Song.objects.get(key=song_key)
song.song = generated.rstrip('$')
song.save()
writer.write(song_key)
except WriterException:
break
else:
return
if len(generated) > MAX_SONG_LENGTH:
break
示例10: choose_random_class
def choose_random_class(self):
"""
Choose a random class, weighted by its size
:return: the class name
"""
return sample([(class_name, len(data['class'].body) + 1)
for class_name, data in self._inheritance_graph.nodes_iter(data=True)])
示例11: choose_random_method
def choose_random_method(self):
"""
Choose a random method, weighted by its size
:return: the method name
"""
return sample([(method_name, len(data['method'].body) + 1)
for method_name, data in self._method_call_graph.nodes_iter(True)])
示例12: generate_caption
def generate_caption(self, session, img_feature,toSample=False):
dp = 1
img_template = np.zeros([self.config.batch_size, self.config.img_dim])
img_template[0,:] = img_feature
sent_pred = np.ones([self.config.batch_size, 1])*3591 # <SOS>
while sent_pred[0,-1] != 3339 and (sent_pred.shape[1] - 1) < 50:
feed = {self._sent_placeholder: sent_pred,
self._img_placeholder: img_template,
self._targets_placeholder: np.ones([self.config.batch_size,1]), # dummy variable
self._dropout_placeholder: dp}
idx_next_pred = np.arange(1, self.config.batch_size + 1)*(sent_pred.shape[1] + 1) - 1
if toSample:
logits = session.run(self.logits, feed_dict=feed)
next_logits = logits[idx_next_pred,:]
raw_predicted = []
for row_idx in range(next_logits.shape[0]):
idx = sample(next_logits[row_idx,:])
raw_predicted.append(idx)
raw_predicted = np.array(raw_predicted)
else:
raw_predicted = session.run(self._predictions, feed_dict=feed)
raw_predicted = raw_predicted[idx_next_pred]
next_pred = np.reshape(raw_predicted, (self.config.batch_size,1))
sent_pred = np.concatenate([sent_pred, next_pred], 1)
predicted_sentence = ' '.join(self.index2token[idx] for idx in sent_pred[0,1:-1])
return predicted_sentence
示例13: get_arca_reads
def get_arca_reads(N=None):
"""Return N downsampled reads from ArcA dataset"""
filename = '/home/pat/chip_seq_inference/data/chip_seq_datasets/ArcA_park_et_al/SRR835423/SRR835423.map'
arca_reads = read_map(filename)
sampled_arca_reads = sample(N, arca_reads) if N else arca_reads
sampled_read_fraction = len(sampled_arca_reads)/float(len(arca_reads))
print "sampled %1.2f%% of %s reads" % (sampled_read_fraction*100, len(arca_reads))
return sampled_arca_reads
示例14: solve_n_queens_problem
def solve_n_queens_problem(number_of_queens, population_size=10**3, max_iterations=10**4):
assert 0 < number_of_queens < 256
indices = numpy.arange(number_of_queens)
# def swap_2_random_rows(population, all_rows=indices):
# perm, rand_rows = sample(population, 1)[0], sample(all_rows, 2)
# new_perm = perm.copy()
# new_perm[rand_rows[::-1]] = perm[rand_rows[0]], perm[rand_rows[1]]
# return new_perm
swap_2_random_rows = lambda population, all_rows=indices: swap(sample(population, 1)[0], sample(all_rows, 2))
numb_of_parents = 2
chromo_length = number_of_queens/numb_of_parents
slices = tuple(imap(
apply,
repeat(slice),
izip_longest(*imap(xrange, (0, chromo_length), repeat(number_of_queens - 1), repeat(chromo_length))),
))
def merge_2_random_solutions(population):
return permutation_from_inversion( # merge two solutions by merging their inversion sequence ...
numpy.fromiter(
chain.from_iterable(
imap( # get inversion sequence from each donor parent ...
item,
imap(tuple, imap(permutation_inversion, sample(population, numb_of_parents))),
slices
)
),
count=number_of_queens,
dtype=board_element_type
)
)
operators = merge_2_random_solutions, swap_2_random_rows
def genetic_operators(population, sample_size, prob_of_mutation=.3):
return sorted(
imap(apply, imap(operators.__getitem__, random(sample_size) < prob_of_mutation), repeat((population,))),
key=fitness_function,
reverse=True
)
return genetic_algorithm(
sorted(
starmap(
sample,
repeat((numpy.arange(number_of_queens, dtype=board_element_type), number_of_queens), population_size)
),
key=fitness_function,
reverse=True
),
selection,
genetic_operators,
sort_population,
lambda perm: len(perm) - fitness_function(perm),
max_iterations=max_iterations
)
示例15: nn_classify
def nn_classify(self, N, test_lc, train_files):
best_matches = []
best_distances = []
best_files = []
# Read index of each lc file
upto = 0
for filename in train_files:
#if upto % 200 == 0:
# print upto
upto += 1
# Read all the light curve data into an array
lc_data = open(self._testdir + '/' + filename)
lc_class = filename.strip().split('_')[0]
lc = [[], []]
for line in lc_data:
line = line.strip().split(',')
lc[0].append(float(line[0]))
lc[1].append(float(line[1]))
lc_data.close()
normalise(lc)
lc = sample(lc, 400)
lc = distribute(lc)
# Update the nearest neighbour
distance = self._distance_fn(test_lc, lc)
# Find insert point
insert_point = 0
found = False
for insert_point, bd in enumerate(best_distances):
if bd >= distance:
found = True
break
if found or len(best_distances) == 0:
best_distances.insert(insert_point, distance)
best_matches.insert(insert_point, lc_class)
best_files.insert(insert_point, filename)
# Pop from the top of the list if it's too long
if len(best_distances) > N:
best_distances.pop()
best_matches.pop()
best_files.pop()
# Compute nearest neighbor by majority
near_count = {}
for c in best_matches:
if c not in near_count.keys():
near_count[c] = 1
else:
near_count[c] += 1
#print sorted(near_count.items(), key=itemgetter(1))
return [sorted(near_count.items(), key=itemgetter(1))[-1][0], best_files]