本文整理汇总了Python中tensorflow.compat.v1.gfile方法的典型用法代码示例。如果您正苦于以下问题:Python v1.gfile方法的具体用法?Python v1.gfile怎么用?Python v1.gfile使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tensorflow.compat.v1
的用法示例。
在下文中一共展示了v1.gfile方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: decorator_from_parameterization_file
# 需要导入模块: from tensorflow.compat import v1 [as 别名]
# 或者: from tensorflow.compat.v1 import gfile [as 别名]
def decorator_from_parameterization_file(
filename, fallback_rule=FallbackRule.pass_through, **kwargs):
"""Create a ConfigurableOps from a parameterization file.
Loads a json parameterization file from disk
(as saved by tools.structure_exporter) and creates an ConfigurableOps from
it.
Args:
filename: Path to a parameterization file in json format.
fallback_rule: A `FallbackRule` enum which controls fallback behavior
(see __init__ for more detail.)
**kwargs: Miscellaneous args for ConfigurableOps.
Returns:
An ConfigurableOps instance with the parameterization from `filename`.
"""
with tf.gfile.Open(filename, 'r') as f:
parameterization = json.loads(f.read())
return ConfigurableOps(
parameterization=parameterization, fallback_rule=fallback_rule,
**kwargs)
示例2: main
# 需要导入模块: from tensorflow.compat import v1 [as 别名]
# 或者: from tensorflow.compat.v1 import gfile [as 别名]
def main(_):
def load_dataset_file(dataset_file):
with gfile.Open(dataset_file) as df:
dataset_json = json.load(df)
data = dataset_json['data']
return data
def load_preds_file(prediction_file):
with gfile.Open(prediction_file) as pf:
preds = json.load(pf)
return preds
dataset = load_dataset_file(FLAGS.watermark_file)
preds = load_preds_file(FLAGS.watermark_output_file)
logging.info('Watermark Label Accuracy =')
logging.info(
json.dumps(evaluate_dataset_preds(dataset, preds, ans_key='answers')))
logging.info('Victim Label Accuracy =')
logging.info(
json.dumps(
evaluate_dataset_preds(dataset, preds, ans_key='original_answers')))
示例3: read_character_based_corpus
# 需要导入模块: from tensorflow.compat import v1 [as 别名]
# 或者: from tensorflow.compat.v1 import gfile [as 别名]
def read_character_based_corpus(filename, encoding='utf-8'):
with codecs.getreader(encoding)(tf.gfile.GFile(filename, mode='rb')) as f:
return Corpus([line.rstrip('\n') for line in f])
示例4: read_word_based_corpus
# 需要导入模块: from tensorflow.compat import v1 [as 别名]
# 或者: from tensorflow.compat.v1 import gfile [as 别名]
def read_word_based_corpus(filename, encoding='utf-8'):
with codecs.getreader(encoding)(tf.gfile.GFile(filename, mode='rb')) as f:
return Corpus([line.split() for line in f])
示例5: _load_replay_buffers
# 需要导入模块: from tensorflow.compat import v1 [as 别名]
# 或者: from tensorflow.compat.v1 import gfile [as 别名]
def _load_replay_buffers(self, num_buffers=None):
"""Loads multiple checkpoints into a list of replay buffers."""
if not self._loaded_buffers: # pytype: disable=attribute-error
ckpts = gfile.ListDirectory(self._data_dir) # pytype: disable=attribute-error
# Assumes that the checkpoints are saved in a format CKPT_NAME.{SUFFIX}.gz
ckpt_counters = collections.Counter(
[name.split('.')[-2] for name in ckpts])
# Should contain the files for add_count, action, observation, reward,
# terminal and invalid_range
ckpt_suffixes = [x for x in ckpt_counters if ckpt_counters[x] in [6, 7]]
if num_buffers is not None:
ckpt_suffixes = np.random.choice(
ckpt_suffixes, num_buffers, replace=False)
self._replay_buffers = []
# Load the replay buffers in parallel
with futures.ThreadPoolExecutor(
max_workers=num_buffers) as thread_pool_executor:
replay_futures = [thread_pool_executor.submit(
self._load_buffer, suffix) for suffix in ckpt_suffixes]
for f in replay_futures:
replay_buffer = f.result()
if replay_buffer is not None:
self._replay_buffers.append(replay_buffer)
self.add_count = max(replay_buffer.add_count, self.add_count)
self._num_replay_buffers = len(self._replay_buffers)
if self._num_replay_buffers:
self._loaded_buffers = True
示例6: main
# 需要导入模块: from tensorflow.compat import v1 [as 别名]
# 或者: from tensorflow.compat.v1 import gfile [as 别名]
def main(_):
output_data = []
dataset_paths = FLAGS.dataset_paths.split(",")
for dp in dataset_paths:
with gfile.Open(dp, "r") as f:
base_dataset = f.read().strip().split("\n")
base_dataset_header = base_dataset[0]
base_dataset = base_dataset[1:]
indices_base_dataset = [
base_dataset_header.split("\t").index(x)
for x in relevant_headers[FLAGS.task_name]
]
for point in base_dataset:
input_shards = [
point.split("\t")[index] for index in indices_base_dataset
]
output_data.append(("%d\t" % len(output_data)) + "\t".join(input_shards))
logging.info("Final dataset size = %d", len(output_data))
final_header = "index\t" + "\t".join(relevant_headers[FLAGS.task_name])
output_data = [final_header] + output_data
with gfile.Open(FLAGS.output_path, "w") as f:
f.write("\n".join(output_data) + "\n")
示例7: main
# 需要导入模块: from tensorflow.compat import v1 [as 别名]
# 或者: from tensorflow.compat.v1 import gfile [as 别名]
def main(_):
with gfile.Open(FLAGS.input_path, "r") as f:
sents_data = f.read().strip().split("\n")[1:]
classes = [0 for _ in range(num_labels[FLAGS.task_name])]
entropies = []
# Assume that the last three columns are probability information
for x in tqdm(sents_data):
probs = (x.split("\t"))[-num_labels[FLAGS.task_name]:]
probs = [float(x1) for x1 in probs]
entropies.append(stats.entropy(probs))
classes[np.argmax(probs)] += 1
class_distro = []
for i, cls1 in enumerate(classes):
class_distro.append(float(cls1) / len(sents_data))
logging.info("Class %d = %.6f (%d / %d)", i,
float(cls1) / len(sents_data), cls1, len(sents_data))
class_entropy = stats.entropy(class_distro)
logging.info("Class distribution self-entropy = %.8f", class_entropy)
logging.info("Average per-instance self-entropy = %.8f", np.mean(entropies))
logging.info("Max per-instance self-entropy = %.8f", np.max(entropies))
logging.info("Min per-instance self-entropy = %.8f", np.min(entropies))
logging.info("Std per-instance self-entropy = %.8f", np.std(entropies))
return
示例8: main
# 需要导入模块: from tensorflow.compat import v1 [as 别名]
# 或者: from tensorflow.compat.v1 import gfile [as 别名]
def main(_):
random.seed(FLAGS.random_seed)
with gfile.Open(FLAGS.input_path, "r") as f:
sents_data = f.read().strip().split("\n")
header = sents_data[0]
sents_data = sents_data[1:]
vocab = build_vocab(sents_data)
subset_sents_data = build_subset(sents_data)
output_data = []
for sent in subset_sents_data:
output_data.append(sent)
data_point_parts = sent.split("\t")
original_sent = data_point_parts[0].split()
if FLAGS.keep_only_original:
continue
# For each pertubation, construct a new sentence and randomly replace a word
for _ in range(FLAGS.num_pertubations):
pertubed = [x for x in original_sent]
pertubed[random.randint(0, len(original_sent) - 1)] = random.choice(vocab)
output_data.append(" ".join(pertubed) + " \t" +
"\t".join(data_point_parts[1:]))
output_data = [header] + output_data
with gfile.Open(FLAGS.output_path, "w") as f:
f.write("\n".join(output_data) + "\n")
return
示例9: main
# 需要导入模块: from tensorflow.compat import v1 [as 别名]
# 或者: from tensorflow.compat.v1 import gfile [as 别名]
def main(_):
with gfile.Open(FLAGS.data_file) as f:
dataset_json = json.load(f)
dataset = dataset_json['data']
with gfile.Open(FLAGS.pred_file) as f:
preds = json.load(f)
qid_to_has_ans = make_qid_to_has_ans(dataset) # maps qid to True/False
has_ans_qids = [k for k, v in qid_to_has_ans.items() if v]
no_ans_qids = [k for k, v in qid_to_has_ans.items() if not v]
exact_raw, f1_raw = get_raw_scores(dataset, preds)
na_probs = {k: 0.0 for k in preds}
exact_thresh = apply_no_ans_threshold(exact_raw, na_probs, qid_to_has_ans,
1.0)
f1_thresh = apply_no_ans_threshold(f1_raw, na_probs, qid_to_has_ans, 1.0)
out_eval = make_eval_dict(exact_thresh, f1_thresh)
if has_ans_qids:
has_ans_eval = make_eval_dict(
exact_thresh, f1_thresh, qid_list=has_ans_qids)
merge_eval(out_eval, has_ans_eval, 'HasAns')
if no_ans_qids:
no_ans_eval = make_eval_dict(exact_thresh, f1_thresh, qid_list=no_ans_qids)
merge_eval(out_eval, no_ans_eval, 'NoAns')
if FLAGS.out_file:
with gfile.Open(FLAGS.out_file, 'w') as f:
json.dump(out_eval, f)
else:
print(json.dumps(out_eval, indent=2))
示例10: main
# 需要导入模块: from tensorflow.compat import v1 [as 别名]
# 或者: from tensorflow.compat.v1 import gfile [as 别名]
def main(_):
def load_dataset_file(dataset_file):
with gfile.Open(dataset_file) as df:
dataset_json = json.load(df)
data = dataset_json['data']
return data
def load_preds_file(prediction_file):
with gfile.Open(prediction_file) as pf:
preds = json.load(pf)
return preds
if FLAGS.dataset_file and FLAGS.dataset_file2:
dataset1 = load_dataset_file(FLAGS.dataset_file)
dataset2 = load_dataset_file(FLAGS.dataset_file2)
print(json.dumps(evaluate_dataset_dataset(dataset1, dataset2)))
elif FLAGS.prediction_file and FLAGS.prediction_file2:
preds1 = load_preds_file(FLAGS.prediction_file)
preds2 = load_preds_file(FLAGS.prediction_file2)
print(json.dumps(evaluate_preds_preds(preds1, preds2)))
else:
dataset = load_dataset_file(FLAGS.dataset_file)
preds = load_preds_file(FLAGS.prediction_file)
print(json.dumps(evaluate_dataset_preds(dataset, preds)))
示例11: read_data_sets
# 需要导入模块: from tensorflow.compat import v1 [as 别名]
# 或者: from tensorflow.compat.v1 import gfile [as 别名]
def read_data_sets(
train_dir,
fake_data=False, # pylint:disable=unused-argument
one_hot=False,
dtype=np.float32,
reshape=True,
validation_size=5000,
seed=None,
):
"""Read multiple datasets."""
# pylint:disable=invalid-name
TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
TEST_LABELS = 't10k-labels-idx1-ubyte.gz'
local_file = os.path.join(train_dir, TRAIN_IMAGES)
with gfile.Open(local_file, 'rb') as f:
train_images = extract_images(f)
local_file = os.path.join(train_dir, TRAIN_LABELS)
with gfile.Open(local_file, 'rb') as f:
train_labels = extract_labels(f, one_hot=one_hot)
local_file = os.path.join(train_dir, TEST_IMAGES)
with gfile.Open(local_file, 'rb') as f:
test_images = extract_images(f)
local_file = os.path.join(train_dir, TEST_LABELS)
with gfile.Open(local_file, 'rb') as f:
test_labels = extract_labels(f, one_hot=one_hot)
if not 0 <= validation_size <= len(train_images):
raise ValueError(
'Validation size should be between 0 and {}. Received: {}.'.format(
len(train_images), validation_size))
validation_images = train_images[:validation_size]
validation_labels = train_labels[:validation_size]
train_images = train_images[validation_size:]
train_labels = train_labels[validation_size:]
options = dict(dtype=dtype, reshape=reshape, seed=seed)
train = DataSet(train_images, train_labels, **options)
validation = DataSet(validation_images, validation_labels, **options)
test = DataSet(test_images, test_labels, **options)
return train, validation, test
示例12: main
# 需要导入模块: from tensorflow.compat import v1 [as 别名]
# 或者: from tensorflow.compat.v1 import gfile [as 别名]
def main(_):
task_name = FLAGS.task_name.lower()
with gfile.Open(FLAGS.sents_path, "r") as f:
sents_data = f.read().strip().split("\n")
header = sents_data[0] + "".join(
["\tlabel%d_prob" % i for i in range(num_labels[task_name])])
sents_data = sents_data[1:]
if FLAGS.probs_path:
with gfile.Open(FLAGS.probs_path, "r") as f:
probs_data = f.read().strip().split("\n")
else:
probs_data = None
if FLAGS.split_type == "train":
assert len(sents_data) == len(probs_data)
output_data = [
x.strip() + "\t" + y.strip() for x, y in zip(sents_data, probs_data)
]
elif FLAGS.split_type == "train_argmax":
assert len(sents_data) == len(probs_data)
# Round the probability vectors before adding them to file
output_data = []
for x, y, in zip(sents_data, probs_data):
# Convert tsv probability vector to numpy style array
prob_vector = np.array([float(yy) for yy in y.split("\t")])
# initialize a vector with zeros
argmax_prob_vector = np.zeros_like(prob_vector)
# keep only the argmax prediction
argmax_prob_vector[np.argmax(prob_vector)] = 1.0
argmax_prob_str = "\t".join([str(yy) for yy in argmax_prob_vector])
output_data.append(x.strip() + "\t" + argmax_prob_str.strip())
elif FLAGS.split_type == "dev":
if task_name == "sst-2":
output_data = [
x.strip() + "\t1\t0" if x.split("\t")[1] == "0" else x.strip() +
"\t0\t1" for x in sents_data
]
elif task_name == "mnli":
output_data = [
x.strip() + mnli_map[x.split("\t")[-1]] for x in sents_data
]
output_data = [header] + output_data
with gfile.Open(FLAGS.output_path, "w") as f:
f.write("\n".join(output_data) + "\n")
return
示例13: build_vocab
# 需要导入模块: from tensorflow.compat import v1 [as 别名]
# 或者: from tensorflow.compat.v1 import gfile [as 别名]
def build_vocab(sents_data,
task_name,
vocab_mode="downstream_vocab",
vocab_path=None):
"""find all words in input corpus to build a vocabulary."""
if vocab_mode == "bert_vocab":
# Use a custom vocab to carry out filtering (such as BERT's word piece)
with gfile.Open(vocab_path, "r") as f:
vocab = f.read().strip().split("\n")
# Filter out special tokens
vocab = [x for x in vocab if x[0] != "[" and x[-1] != "]"]
probs = [1.0 / len(vocab) for _ in vocab]
elif vocab_mode == "full_corpus":
# Use all words in a corpus of text to find out the vocabulary
vocab = collections.Counter("\n".join(sents_data).split())
vocab = [(k, v) for k, v in vocab.items()]
vocab.sort(key=lambda x: x[1], reverse=True)
vocab_total = sum([x[1] for x in vocab])
probs = [float(x[1]) / vocab_total for x in vocab]
vocab = [x[0] for x in vocab]
elif "full_corpus_top_" in vocab_mode:
full_vocab = collections.defaultdict(int)
for sent in sents_data:
for word in sent.split():
full_vocab[word] += 1
# Sort the vocabulary words according to their frequency
full_vocab = sorted([(k, v) for k, v in full_vocab.items()],
key=lambda x: x[1],
reverse=True)
# Take the top-k values from the vocabulary for the final list
top_k_val = int(vocab_mode[len("full_corpus_top_"):])
vocab = [x[0] for x in full_vocab[:top_k_val]]
probs = [1.0 / len(vocab) for _ in vocab]
elif vocab_mode == "downstream_vocab":
vocab = collections.defaultdict(int)
for sent in sents_data:
for index in task_input_indices[task_name]:
original_sent = sent.split("\t")[index].split()
for word in original_sent:
vocab[word] += 1
vocab = [(k, v) for k, v in vocab.items()]
vocab.sort(key=lambda x: x[1], reverse=True)
vocab_total = sum([x[1] for x in vocab])
probs = [float(x[1]) / vocab_total for x in vocab]
vocab = [x[0] for x in vocab]
else:
probs = None
vocab = None
return vocab, probs
示例14: main
# 需要导入模块: from tensorflow.compat import v1 [as 别名]
# 或者: from tensorflow.compat.v1 import gfile [as 别名]
def main(_):
with gfile.Open(FLAGS.questions_path, "r") as f:
questions_data = json.loads(f.read())
with gfile.Open(FLAGS.predictions_path, "r") as f:
predictions_data = json.loads(f.read())
counter = 0
unanswerable = 0
total = 0
for instance in tqdm.tqdm(questions_data["data"]):
for para in instance["paragraphs"]:
para_text = para["context"]
for qa in para["qas"]:
answer_text = predictions_data[qa["id"]]
total += 1
if answer_text.strip():
qa["is_impossible"] = False
# due to minor data processing issues, there are a few cases where the
# predicted answer does not exist exactly in the paragraph text.
# In this case, check if the first word of the answer is present in
# the paragraph and approximate the answer_start using it.
if answer_text not in para_text:
counter += 1
# If even the first word is not in the paragraph, ignore this QA
if answer_text.split()[0] not in para_text:
continue
else:
# approximate answer_start by the position of the first word
qa["answers"] = [{
"text": answer_text,
"answer_start": para_text.index(answer_text.split()[0])
}]
continue
# the usual case where answer_text is exactly present in para_text
qa["answers"] = [{
"text": answer_text,
"answer_start": para_text.index(answer_text)
}]
else:
# this code makes it compatible to SQuAD 2.0
unanswerable += 1
qa["answers"] = []
qa["is_impossible"] = True
logging.info("%d / %d answers were unanswerable", unanswerable, total)
logging.info("%d / %d answers didn't have an exact match", counter, total)
with gfile.Open(FLAGS.output_path, "w") as f:
f.write(json.dumps(questions_data))
示例15: main
# 需要导入模块: from tensorflow.compat import v1 [as 别名]
# 或者: from tensorflow.compat.v1 import gfile [as 别名]
def main(_):
random.seed(FLAGS.random_seed)
with gfile.Open(FLAGS.input_path, "r") as f:
sents_data = json.loads(f.read())
output_data = {"data": [], "version": FLAGS.version}
# Find all the question IDs in the SQuAD dataset
question_ids = []
for instance in sents_data["data"]:
for para in instance["paragraphs"]:
for qa in para["qas"]:
question_ids.append(qa["id"])
# Randomly shuffle the question IDs, and choose FLAGS.fraction percent of them
random.shuffle(question_ids)
num_final_questions = int(round(len(question_ids) * FLAGS.fraction))
question_ids = {x: 1 for x in question_ids[:num_final_questions]}
# Preserve the original dataset size and paragraphs, choose random questions
# based on the question IDs which survived the filtering.
for instance in tqdm.tqdm(sents_data["data"]):
instance_data = {"title": instance["title"], "paragraphs": []}
for para in instance["paragraphs"]:
para_instance = {"context": para["context"], "qas": []}
for qa in para["qas"]:
# Only choose those questions which survived the filtering.
if qa["id"] in question_ids:
para_instance["qas"].append(qa)
# Don't append paras with no QAs
if para_instance["qas"]:
instance_data["paragraphs"].append(para_instance)
# Don't append instances with no paragraphs.
if instance_data["paragraphs"]:
output_data["data"].append(instance_data)
# Count the total number of questions in the final, smaller dataset.
total_questions = 0
for instance in output_data["data"]:
for para in instance["paragraphs"]:
for qa in para["qas"]:
total_questions += 1
logging.info("Final dataset size = %d", total_questions)
with gfile.Open(FLAGS.output_path, "w") as f:
f.write(json.dumps(output_data))
return