本文整理汇总了Python中preprocessing.write_tf_examples方法的典型用法代码示例。如果您正苦于以下问题:Python preprocessing.write_tf_examples方法的具体用法?Python preprocessing.write_tf_examples怎么用?Python preprocessing.write_tf_examples使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类preprocessing
的用法示例。
在下文中一共展示了preprocessing.write_tf_examples方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_rotate_pyfunc
# 需要导入模块: import preprocessing [as 别名]
# 或者: from preprocessing import write_tf_examples [as 别名]
def test_rotate_pyfunc(self):
num_records = 20
raw_data = self.create_random_data(num_records)
tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))
with tempfile.NamedTemporaryFile() as f:
preprocessing.write_tf_examples(f.name, tfexamples)
self.reset_random()
run_one = self.extract_data(f.name, random_rotation=False)
self.reset_random()
run_two = self.extract_data(f.name, random_rotation=True)
self.reset_random()
run_three = self.extract_data(f.name, random_rotation=True)
self.assert_rotate_data(run_one, run_two, run_three)
示例2: test_tpu_rotate
# 需要导入模块: import preprocessing [as 别名]
# 或者: from preprocessing import write_tf_examples [as 别名]
def test_tpu_rotate(self):
num_records = 100
raw_data = self.create_random_data(num_records)
tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))
with tempfile.NamedTemporaryFile() as f:
preprocessing.write_tf_examples(f.name, tfexamples)
self.reset_random()
run_one = self.extract_tpu_data(f.name, random_rotation=False)
self.reset_random()
run_two = self.extract_tpu_data(f.name, random_rotation=True)
self.reset_random()
run_three = self.extract_tpu_data(f.name, random_rotation=True)
self.assert_rotate_data(run_one, run_two, run_three)
示例3: selfplay
# 需要导入模块: import preprocessing [as 别名]
# 或者: from preprocessing import write_tf_examples [as 别名]
def selfplay(
load_file: "The path to the network model files",
output_dir: "Where to write the games"="data/selfplay",
holdout_dir: "Where to write the games"="data/holdout",
output_sgf: "Where to write the sgfs"="sgf/",
readouts: 'How many simulations to run per move'=100,
verbose: '>=2 will print debug info, >=3 will print boards' = 1,
resign_threshold: 'absolute value of threshold to resign at' = 0.95,
holdout_pct: 'how many games to hold out for validation' = 0.05):
qmeas.start_time('selfplay')
clean_sgf = os.path.join(output_sgf, 'clean')
full_sgf = os.path.join(output_sgf, 'full')
_ensure_dir_exists(clean_sgf)
_ensure_dir_exists(full_sgf)
_ensure_dir_exists(output_dir)
_ensure_dir_exists(holdout_dir)
with timer("Loading weights from %s ... " % load_file):
network = dual_net.DualNetwork(load_file)
with timer("Playing game"):
player = selfplay_mcts.play(
network, readouts, resign_threshold, verbose)
output_name = '{}-{}'.format(int(time.time() * 1000 * 1000), socket.gethostname())
game_data = player.extract_data()
with gfile.GFile(os.path.join(clean_sgf, '{}.sgf'.format(output_name)), 'w') as f:
f.write(player.to_sgf(use_comments=False))
with gfile.GFile(os.path.join(full_sgf, '{}.sgf'.format(output_name)), 'w') as f:
f.write(player.to_sgf())
tf_examples = preprocessing.make_dataset_from_selfplay(game_data)
# Hold out 5% of games for evaluation.
if random.random() < holdout_pct:
fname = os.path.join(holdout_dir, "{}.tfrecord.zz".format(output_name))
else:
fname = os.path.join(output_dir, "{}.tfrecord.zz".format(output_name))
preprocessing.write_tf_examples(fname, tf_examples)
qmeas.stop_time('selfplay')
示例4: selfplay_cache_model
# 需要导入模块: import preprocessing [as 别名]
# 或者: from preprocessing import write_tf_examples [as 别名]
def selfplay_cache_model(
network: "The path to the network model files",
output_dir: "Where to write the games"="data/selfplay",
holdout_dir: "Where to write the games"="data/holdout",
output_sgf: "Where to write the sgfs"="sgf/",
readouts: 'How many simulations to run per move'=100,
verbose: '>=2 will print debug info, >=3 will print boards' = 1,
resign_threshold: 'absolute value of threshold to resign at' = 0.95,
holdout_pct: 'how many games to hold out for validation' = 0.05):
qmeas.start_time('selfplay')
clean_sgf = os.path.join(output_sgf, 'clean')
full_sgf = os.path.join(output_sgf, 'full')
_ensure_dir_exists(clean_sgf)
_ensure_dir_exists(full_sgf)
_ensure_dir_exists(output_dir)
_ensure_dir_exists(holdout_dir)
with timer("Playing game"):
player = selfplay_mcts.play(
network, readouts, resign_threshold, verbose)
output_name = '{}-{}'.format(int(time.time() * 1000 * 1000), socket.gethostname())
game_data = player.extract_data()
with gfile.GFile(os.path.join(clean_sgf, '{}.sgf'.format(output_name)), 'w') as f:
f.write(player.to_sgf(use_comments=False))
with gfile.GFile(os.path.join(full_sgf, '{}.sgf'.format(output_name)), 'w') as f:
f.write(player.to_sgf())
tf_examples = preprocessing.make_dataset_from_selfplay(game_data)
# Hold out 5% of games for evaluation.
if random.random() < holdout_pct:
fname = os.path.join(holdout_dir, "{}.tfrecord.zz".format(output_name))
else:
fname = os.path.join(output_dir, "{}.tfrecord.zz".format(output_name))
preprocessing.write_tf_examples(fname, tf_examples)
qmeas.stop_time('selfplay')
示例5: test_serialize_round_trip
# 需要导入模块: import preprocessing [as 别名]
# 或者: from preprocessing import write_tf_examples [as 别名]
def test_serialize_round_trip(self):
np.random.seed(1)
raw_data = self.create_random_data(10)
tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))
with tempfile.NamedTemporaryFile() as f:
preprocessing.write_tf_examples(f.name, tfexamples)
recovered_data = self.extract_data(f.name)
self.assertEqualData(raw_data, recovered_data)
示例6: test_filter
# 需要导入模块: import preprocessing [as 别名]
# 或者: from preprocessing import write_tf_examples [as 别名]
def test_filter(self):
raw_data = self.create_random_data(100)
tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))
with tempfile.NamedTemporaryFile() as f:
preprocessing.write_tf_examples(f.name, tfexamples)
recovered_data = self.extract_data(f.name, filter_amount=.05)
# TODO: this will flake out very infrequently. Use set_random_seed
self.assertLess(len(recovered_data), 50)
示例7: test_serialize_round_trip
# 需要导入模块: import preprocessing [as 别名]
# 或者: from preprocessing import write_tf_examples [as 别名]
def test_serialize_round_trip(self):
np.random.seed(1)
raw_data = self.create_random_data(10)
tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))
with tempfile.NamedTemporaryFile() as f:
preprocessing.write_tf_examples(f.name, tfexamples)
recovered_data = self.extract_data(f.name)
self.assertEqualData(raw_data, recovered_data)
示例8: test_filter
# 需要导入模块: import preprocessing [as 别名]
# 或者: from preprocessing import write_tf_examples [as 别名]
def test_filter(self):
raw_data = self.create_random_data(100)
tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))
with tempfile.NamedTemporaryFile() as f:
preprocessing.write_tf_examples(f.name, tfexamples)
recovered_data = self.extract_data(f.name, filter_amount=.05)
self.assertLess(len(recovered_data), 50)
示例9: test_serialize_round_trip_no_parse
# 需要导入模块: import preprocessing [as 别名]
# 或者: from preprocessing import write_tf_examples [as 别名]
def test_serialize_round_trip_no_parse(self):
np.random.seed(1)
raw_data = self.create_random_data(10)
tfexamples = list(map(preprocessing.make_tf_example, *zip(*raw_data)))
with tempfile.NamedTemporaryFile() as start_file, \
tempfile.NamedTemporaryFile() as rewritten_file:
preprocessing.write_tf_examples(start_file.name, tfexamples)
# We want to test that the rewritten, shuffled file contains correctly
# serialized tf.Examples.
batch_size = 4
batches = list(preprocessing.shuffle_tf_examples(
1000, batch_size, [start_file.name]))
# 2 batches of 4, 1 incomplete batch of 2.
self.assertEqual(len(batches), 3)
# concatenate list of lists into one list
all_batches = list(itertools.chain.from_iterable(batches))
for _ in batches:
preprocessing.write_tf_examples(
rewritten_file.name, all_batches, serialize=False)
original_data = self.extract_data(start_file.name)
recovered_data = self.extract_data(rewritten_file.name)
# stuff is shuffled, so sort before checking equality
def sort_key(nparray_tuple):
return nparray_tuple[2]
original_data = sorted(original_data, key=sort_key)
recovered_data = sorted(recovered_data, key=sort_key)
self.assertEqualData(original_data, recovered_data)
示例10: flush
# 需要导入模块: import preprocessing [as 别名]
# 或者: from preprocessing import write_tf_examples [as 别名]
def flush(self, path):
# random.shuffle on deque is O(n^2) convert to list for O(n)
self.examples = list(self.examples)
random.shuffle(self.examples)
with timer("Writing examples to " + path):
preprocessing.write_tf_examples(
path, [ex[1] for ex in self.examples], serialize=False)
self.examples.clear()
self.examples = deque(maxlen=self.max_size)
示例11: gather
# 需要导入模块: import preprocessing [as 别名]
# 或者: from preprocessing import write_tf_examples [as 别名]
def gather(
input_directory: 'where to look for games'='data/selfplay/',
output_directory: 'where to put collected games'='data/training_chunks/',
examples_per_record: 'how many tf.examples to gather in each chunk'=EXAMPLES_PER_RECORD):
qmeas.start_time('gather')
_ensure_dir_exists(output_directory)
models = [model_dir.strip('/')
for model_dir in sorted(gfile.ListDirectory(input_directory))[-50:]]
with timer("Finding existing tfrecords..."):
model_gamedata = {
model: gfile.Glob(
os.path.join(input_directory, model, '*.tfrecord.zz'))
for model in models
}
print("Found %d models" % len(models))
for model_name, record_files in sorted(model_gamedata.items()):
print(" %s: %s files" % (model_name, len(record_files)))
meta_file = os.path.join(output_directory, 'meta.txt')
try:
with gfile.GFile(meta_file, 'r') as f:
already_processed = set(f.read().split())
except tf.errors.NotFoundError:
already_processed = set()
num_already_processed = len(already_processed)
for model_name, record_files in sorted(model_gamedata.items()):
if set(record_files) <= already_processed:
continue
print("Gathering files for %s:" % model_name)
for i, example_batch in enumerate(
tqdm(preprocessing.shuffle_tf_examples(examples_per_record, record_files))):
output_record = os.path.join(output_directory,
'{}-{}.tfrecord.zz'.format(model_name, str(i)))
preprocessing.write_tf_examples(
output_record, example_batch, serialize=False)
already_processed.update(record_files)
print("Processed %s new files" %
(len(already_processed) - num_already_processed))
with gfile.GFile(meta_file, 'w') as f:
f.write('\n'.join(sorted(already_processed)))
qmeas.stop_time('gather')
示例12: selfplay
# 需要导入模块: import preprocessing [as 别名]
# 或者: from preprocessing import write_tf_examples [as 别名]
def selfplay(model_name, trained_models_dir, selfplay_dir, holdout_dir, sgf_dir,
params):
"""Perform selfplay with a specific model.
Args:
model_name: The name of the model used for selfplay.
trained_models_dir: The path to the model files.
selfplay_dir: Where to write the games. Set as 'base_dir/data/selfplay/'.
holdout_dir: Where to write the holdout data. Set as
'base_dir/data/holdout/'.
sgf_dir: Where to write the sgf (Smart Game Format) files. Set as
'base_dir/sgf/'.
params: An object of hyperparameters for the model.
"""
print('Playing a game with model {}'.format(model_name))
# Set paths for the model with 'model_name'
model_path = os.path.join(trained_models_dir, model_name)
output_dir = os.path.join(selfplay_dir, model_name)
holdout_dir = os.path.join(holdout_dir, model_name)
# clean_sgf is to write sgf file without comments.
# full_sgf is to write sgf file with comments.
clean_sgf = os.path.join(sgf_dir, model_name, 'clean')
full_sgf = os.path.join(sgf_dir, model_name, 'full')
_ensure_dir_exists(output_dir)
_ensure_dir_exists(holdout_dir)
_ensure_dir_exists(clean_sgf)
_ensure_dir_exists(full_sgf)
with utils.logged_timer('Loading weights from {} ... '.format(model_path)):
network = dualnet.DualNetRunner(model_path, params)
with utils.logged_timer('Playing game'):
player = selfplay_mcts.play(
params.board_size, network, params.selfplay_readouts,
params.selfplay_resign_threshold, params.simultaneous_leaves,
params.selfplay_verbose)
output_name = '{}-{}'.format(int(time.time()), socket.gethostname())
def _write_sgf_data(dir_sgf, use_comments):
with tf.gfile.GFile(
os.path.join(dir_sgf, '{}.sgf'.format(output_name)), 'w') as f:
f.write(player.to_sgf(use_comments=use_comments))
_write_sgf_data(clean_sgf, use_comments=False)
_write_sgf_data(full_sgf, use_comments=True)
game_data = player.extract_data()
tf_examples = preprocessing.make_dataset_from_selfplay(game_data, params)
# Hold out 5% of games for evaluation.
if random.random() < params.holdout_pct:
fname = os.path.join(
holdout_dir, ('{}'+_TF_RECORD_SUFFIX).format(output_name))
else:
fname = os.path.join(
output_dir, ('{}'+_TF_RECORD_SUFFIX).format(output_name))
preprocessing.write_tf_examples(fname, tf_examples)
示例13: gather
# 需要导入模块: import preprocessing [as 别名]
# 或者: from preprocessing import write_tf_examples [as 别名]
def gather(selfplay_dir, training_chunk_dir, params):
"""Gather selfplay data into large training chunk.
Args:
selfplay_dir: Where to look for games. Set as 'base_dir/data/selfplay/'.
training_chunk_dir: where to put collected games. Set as
'base_dir/data/training_chunks/'.
params: An object of hyperparameters for the model.
"""
# Check the selfplay data from the most recent 50 models.
_ensure_dir_exists(training_chunk_dir)
sorted_model_dirs = sorted(tf.gfile.ListDirectory(selfplay_dir))
models = [model_dir.strip('/')
for model_dir in sorted_model_dirs[-params.gather_generation:]]
with utils.logged_timer('Finding existing tfrecords...'):
model_gamedata = {
model: tf.gfile.Glob(
os.path.join(selfplay_dir, model, '*'+_TF_RECORD_SUFFIX))
for model in models
}
print('Found {} models'.format(len(models)))
for model_name, record_files in sorted(model_gamedata.items()):
print(' {}: {} files'.format(model_name, len(record_files)))
meta_file = os.path.join(training_chunk_dir, 'meta.txt')
try:
with tf.gfile.GFile(meta_file, 'r') as f:
already_processed = set(f.read().split())
except tf.errors.NotFoundError:
already_processed = set()
num_already_processed = len(already_processed)
for model_name, record_files in sorted(model_gamedata.items()):
if set(record_files) <= already_processed:
continue
print('Gathering files from {}:'.format(model_name))
tf_examples = preprocessing.shuffle_tf_examples(
params.shuffle_buffer_size, params.examples_per_chunk, record_files)
# tqdm to make the loops show a smart progress meter
for i, example_batch in enumerate(tf_examples):
output_record = os.path.join(
training_chunk_dir,
('{}-{}'+_TF_RECORD_SUFFIX).format(model_name, str(i)))
preprocessing.write_tf_examples(
output_record, example_batch, serialize=False)
already_processed.update(record_files)
print('Processed {} new files'.format(
len(already_processed) - num_already_processed))
with tf.gfile.GFile(meta_file, 'w') as f:
f.write('\n'.join(sorted(already_processed)))
示例14: selfplay
# 需要导入模块: import preprocessing [as 别名]
# 或者: from preprocessing import write_tf_examples [as 别名]
def selfplay():
_, model_name = get_latest_model()
try:
games = gfile.Glob(os.path.join(PATHS.SELFPLAY_DIR, model_name, '*.zz'))
if len(games) > GLOBAL_PARAMETER_STORE.MAX_GAMES_PER_GENERATION:
logger.info("{} has enough games ({})".format(model_name, len(games)))
time.sleep(600)
sys.exit(1)
except:
pass
for game_idx in range(GLOBAL_PARAMETER_STORE.NUM_SELFPLAY_GAMES):
logger.info('================================================')
logger.info("Playing game {} with model {}".format(game_idx, model_name))
logger.info('================================================')
model_save_path = os.path.join(PATHS.MODELS_DIR, model_name)
game_output_dir = os.path.join(PATHS.SELFPLAY_DIR, model_name)
game_holdout_dir = os.path.join(PATHS.HOLDOUT_DIR, model_name)
sgf_dir = os.path.join(PATHS.SGF_DIR, model_name)
clean_sgf = os.path.join(sgf_dir, 'clean')
full_sgf = os.path.join(sgf_dir, 'full')
os.makedirs(clean_sgf, exist_ok=True)
os.makedirs(full_sgf, exist_ok=True)
os.makedirs(game_output_dir, exist_ok=True)
os.makedirs(game_holdout_dir, exist_ok=True)
with timer("Loading weights from %s ... " % model_save_path):
network = PolicyValueNetwork(model_save_path)
with timer("Playing game"):
agent = alphagozero_agent.play_against_self(network, GLOBAL_PARAMETER_STORE.SELFPLAY_READOUTS)
output_name = '{}-{}'.format(int(time.time()), socket.gethostname())
game_play = agent.extract_data()
with gfile.GFile(os.path.join(clean_sgf, '{}.sgf'.format(output_name)), 'w') as f:
f.write(agent.to_sgf(use_comments=False))
with gfile.GFile(os.path.join(full_sgf, '{}.sgf'.format(output_name)), 'w') as f:
f.write(agent.to_sgf())
tf_examples = preprocessing.create_dataset_from_selfplay(game_play)
# We reserve 5% of games played for validation
holdout = random.random() < GLOBAL_PARAMETER_STORE.HOLDOUT
if holdout:
to_save_dir = game_holdout_dir
else:
to_save_dir = game_output_dir
tf_record_path = os.path.join(to_save_dir, "{}.tfrecord.zz".format(output_name))
preprocessing.write_tf_examples(tf_record_path, tf_examples)
示例15: aggregate
# 需要导入模块: import preprocessing [as 别名]
# 或者: from preprocessing import write_tf_examples [as 别名]
def aggregate():
logger.info("Gathering game results")
os.makedirs(PATHS.TRAINING_CHUNK_DIR, exist_ok=True)
os.makedirs(PATHS.SELFPLAY_DIR, exist_ok=True)
models = [model_dir.strip('/')
for model_dir in sorted(gfile.ListDirectory(PATHS.SELFPLAY_DIR))[-50:]]
with timer("Finding existing tfrecords..."):
model_gamedata = {
model: gfile.Glob(
os.path.join(PATHS.SELFPLAY_DIR, model, '*.zz'))
for model in models
}
logger.info("Found %d models" % len(models))
for model_name, record_files in sorted(model_gamedata.items()):
logger.info(" %s: %s files" % (model_name, len(record_files)))
meta_file = os.path.join(PATHS.TRAINING_CHUNK_DIR, 'meta.txt')
try:
with gfile.GFile(meta_file, 'r') as f:
already_processed = set(f.read().split())
except tf.errors.NotFoundError:
already_processed = set()
num_already_processed = len(already_processed)
for model_name, record_files in sorted(model_gamedata.items()):
if set(record_files) <= already_processed:
continue
logger.info("Gathering files for %s:" % model_name)
for i, example_batch in enumerate(
tqdm(preprocessing.shuffle_tf_examples(GLOBAL_PARAMETER_STORE.EXAMPLES_PER_RECORD, record_files))):
output_record = os.path.join(PATHS.TRAINING_CHUNK_DIR,
'{}-{}.tfrecord.zz'.format(model_name, str(i)))
preprocessing.write_tf_examples(
output_record, example_batch, serialize=False)
already_processed.update(record_files)
logger.info("Processed %s new files" %
(len(already_processed) - num_already_processed))
with gfile.GFile(meta_file, 'w') as f:
f.write('\n'.join(sorted(already_processed)))