本文整理汇总了Python中allennlp.nn.util.move_to_device方法的典型用法代码示例。如果您正苦于以下问题:Python util.move_to_device方法的具体用法?Python util.move_to_device怎么用?Python util.move_to_device使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.nn.util
的用法示例。
在下文中一共展示了util.move_to_device方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: predict_batch_instance
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import move_to_device [as 别名]
def predict_batch_instance(self, instances: List[Instance]) -> List[JsonDict]:
model = self._model
with torch.no_grad():
cuda_device = model._get_prediction_device()
dataset = Batch(instances)
dataset.index_instances(model.vocab)
model_input = util.move_to_device(dataset.as_tensor_dict(), cuda_device)
outputs = model.decode(model(**model_input))
return sanitize(outputs)
示例2: batch_loss
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import move_to_device [as 别名]
def batch_loss(self, batch_group: List[TensorDict], for_training: bool) -> torch.Tensor:
"""
Does a forward pass on the given batches and returns the ``loss`` value in the result.
If ``for_training`` is `True` also applies regularization penalty.
"""
if self._multiple_gpu:
output_dict = training_util.data_parallel(batch_group, self.model, self._cuda_devices)
else:
assert len(batch_group) == 1
batch = batch_group[0]
batch = nn_util.move_to_device(batch, self._cuda_devices[0])
output_dict = self.model(**batch)
try:
loss = output_dict["loss"]
if for_training:
loss += self.model.get_regularization_penalty()
except KeyError:
if for_training:
raise RuntimeError(
"The model you are trying to optimize does not contain a"
" 'loss' key in the output of model.forward(inputs)."
)
loss = None
return loss
示例3: predict
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import move_to_device [as 别名]
def predict(self, batches):
t11 = time()
predictions = []
for batch, model in zip(batches, self.models):
batch = util.move_to_device(batch.as_tensor_dict(), 0 if torch.cuda.is_available() else -1)
with torch.no_grad():
prediction = model.forward(**batch)
predictions.append(prediction)
preds, idx, error_probs = self._convert(predictions)
t55 = time()
if self.log:
print(f"Inference time {t55 - t11}")
return preds, idx, error_probs
示例4: _make_embedder_input
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import move_to_device [as 别名]
def _make_embedder_input(self, all_tokens: List[str]) -> Dict[str, torch.Tensor]:
inputs = {}
# A bit of a hack; this will only work with some dataset readers, but it'll do for now.
indexers = self.predictor._dataset_reader._token_indexers # type: ignore
for indexer_name, token_indexer in indexers.items():
if isinstance(token_indexer, SingleIdTokenIndexer):
all_indices = [
self.vocab._token_to_index[self.namespace][token] for token in all_tokens
]
inputs[indexer_name] = {"tokens": torch.LongTensor(all_indices).unsqueeze(0)}
elif isinstance(token_indexer, TokenCharactersIndexer):
tokens = [Token(x) for x in all_tokens]
max_token_length = max(len(x) for x in all_tokens)
# sometime max_token_length is too short for cnn encoder
max_token_length = max(max_token_length, token_indexer._min_padding_length)
indexed_tokens = token_indexer.tokens_to_indices(tokens, self.vocab)
padding_lengths = token_indexer.get_padding_lengths(indexed_tokens)
padded_tokens = token_indexer.as_padded_tensor_dict(indexed_tokens, padding_lengths)
inputs[indexer_name] = {
"token_characters": torch.LongTensor(
padded_tokens["token_characters"]
).unsqueeze(0)
}
elif isinstance(token_indexer, ELMoTokenCharactersIndexer):
elmo_tokens = []
for token in all_tokens:
elmo_indexed_token = token_indexer.tokens_to_indices(
[Token(text=token)], self.vocab
)["elmo_tokens"]
elmo_tokens.append(elmo_indexed_token[0])
inputs[indexer_name] = {"elmo_tokens": torch.LongTensor(elmo_tokens).unsqueeze(0)}
else:
raise RuntimeError("Unsupported token indexer:", token_indexer)
return util.move_to_device(inputs, self.cuda_device)
示例5: batch_outputs
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import move_to_device [as 别名]
def batch_outputs(self, batch: TensorDict, for_training: bool) -> Dict[str, torch.Tensor]:
"""
Does a forward pass on the given batch and returns the output dictionary that the model
returns, after adding any specified regularization penalty to the loss (if training).
"""
batch = nn_util.move_to_device(batch, self.cuda_device)
output_dict = self._pytorch_model(**batch)
if for_training:
try:
regularization_penalty = self.model.get_regularization_penalty()
loss = output_dict["loss"]
# Handle model without regularization
if regularization_penalty == 0.0:
regularization_penalty = loss.new_full(size=[], fill_value=0.0)
output_dict["reg_loss"] = regularization_penalty
output_dict["loss"] += regularization_penalty
except KeyError:
if for_training:
raise RuntimeError(
"The model you are trying to optimize does not contain a"
" 'loss' key in the output of model.forward(inputs)."
)
return output_dict
示例6: span_eval
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import move_to_device [as 别名]
def span_eval(model, data_iter, do_lower_case, fitem_dict, device_num, show_progress, pred_no_answer=True):
# fitem_dict in the parameter is the original fitem_dict
output_fitem_dict = {}
with torch.no_grad():
model.eval()
for batch_idx, batch in tqdm(enumerate(data_iter), disable=(not show_progress)):
batch = allen_util.move_to_device(batch, device_num)
paired_sequence = batch['paired_sequence']
paired_segments_ids = batch['paired_segments_ids']
att_mask, _ = torch_util.get_length_and_mask(paired_sequence)
gt_span = batch['gt_span']
start_logits, end_logits, context_length = model(mode=BertSpan.ForwardMode.EVAL,
input_ids=paired_sequence,
token_type_ids=paired_segments_ids,
attention_mask=att_mask,
gt_span=gt_span)
b_fids = batch['fid']
b_uids = batch['uid']
write_to_predicted_fitem(start_logits, end_logits, context_length, b_fids, b_uids, gt_span, fitem_dict,
output_fitem_dict, do_lower_case)
eitem_list, eval_dict = merge_predicted_fitem_to_eitem(output_fitem_dict, None, pred_no_answer=pred_no_answer)
return eitem_list, eval_dict
示例7: write_for_official_eval
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import move_to_device [as 别名]
def write_for_official_eval(model_archive_file, test_file, output_file,
label_ids_to_label):
archive = load_archive(model_archive_file)
model = archive.model
reader = DatasetReader.from_params(archive.config['dataset_reader'])
iterator = DataIterator.from_params(Params({"type": "basic", "batch_size": 4}))
vocab = Vocabulary.from_params(archive.config['vocabulary'])
iterator.index_with(vocab)
model.cuda()
model.eval()
instances = reader.read(test_file)
predictions = []
for batch in iterator(instances, num_epochs=1, shuffle=False):
batch = move_to_device(batch, cuda_device=0)
output = model(**batch)
batch_labels = [
label_ids_to_label[i]
for i in output['predictions'].cpu().numpy().tolist()
]
predictions.extend(batch_labels)
to_write = ''.join(["{}\t{}\n".format(i + 8001, e) for i, e in enumerate(model.metrics[0].pred)])
with open(output_file, 'w') as fout:
fout.write(to_write)
示例8: write_for_official_eval
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import move_to_device [as 别名]
def write_for_official_eval(model_archive_file, test_file, output_file):
archive = load_archive(model_archive_file)
model = archive.model
reader = DatasetReader.from_params(archive.config['dataset_reader'])
iterator = DataIterator.from_params(Params({"type": "basic", "batch_size": 32}))
vocab = Vocabulary.from_params(archive.config['vocabulary'])
iterator.index_with(vocab)
model.cuda()
model.eval()
label_ids_to_label = {0: 'F', 1: 'T'}
instances = reader.read(test_file)
predictions = []
for batch in iterator(instances, num_epochs=1, shuffle=False):
batch = move_to_device(batch, cuda_device=0)
output = model(**batch)
batch_labels = [
label_ids_to_label[i]
for i in output['predictions'].cpu().numpy().tolist()
]
predictions.extend(batch_labels)
assert len(predictions) == 1400
with open(output_file, 'w') as fout:
for p in predictions:
fout.write("{}\n".format(p))
示例9: write_for_official_eval
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import move_to_device [as 别名]
def write_for_official_eval(model_archive_file, test_file, output_file,
label_ids_to_label):
archive = load_archive(model_archive_file)
model = archive.model
reader = DatasetReader.from_params(archive.config['dataset_reader'])
iterator = DataIterator.from_params(Params({"type": "basic", "batch_size": 4}))
vocab = Vocabulary.from_params(archive.config['vocabulary'])
iterator.index_with(vocab)
model.cuda()
model.eval()
instances = reader.read(test_file)
predictions = []
for batch in iterator(instances, num_epochs=1, shuffle=False):
batch = move_to_device(batch, cuda_device=0)
output = model(**batch)
batch_labels = [
label_ids_to_label[i]
for i in output['predictions'].cpu().numpy().tolist()
]
predictions.extend(batch_labels)
with open(output_file, 'w') as fout:
for p in predictions:
fout.write("{}\n".format(p))
示例10: _batch_loss
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import move_to_device [as 别名]
def _batch_loss(self, batch: torch.Tensor, for_training: bool, batch_aux: torch.Tensor=None) -> torch.Tensor:
"""
Does a forward pass on the given batch and returns the ``loss`` value in the result.
If ``for_training`` is `True` also applies regularization penalty.
"""
if self._multiple_gpu:
output_dict = self._data_parallel(batch)
if batch_aux is not None:
raise ConfigurationError('multi-gpu not supported for multi-task training.')
else:
batch = util.move_to_device(batch, self._cuda_devices[0])
output_dict = self._model(**batch)
try:
loss = output_dict["loss"]
if for_training:
loss += self._model.get_regularization_penalty()
except KeyError:
if for_training:
raise RuntimeError("The model you are trying to optimize does not contain a"
" 'loss' key in the output of model.forward(inputs).")
loss = None
if batch_aux is not None:
batch_aux = util.move_to_device(batch_aux, self._cuda_devices[0])
output_dict_aux = self._model(**batch_aux)
try:
loss_aux = output_dict_aux["loss"]
if for_training:
loss_aux += self._model.get_regularization_penalty()
except KeyError:
raise ConfigurationError("The auxilliary model you are trying to optimize does not contain a"
" 'loss' key in the output of model.forward(inputs).")
# multi-task loss
loss = loss + self._mixing_ratio * loss_aux
return loss
示例11: forward_on_instances
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import move_to_device [as 别名]
def forward_on_instances(self, instances: List[Instance]) -> List[Dict[str, numpy.ndarray]]:
"""
Takes a list of `Instances`, converts that text into arrays using this model's `Vocabulary`,
passes those arrays through `self.forward()` and `self.make_output_human_readable()` (which
by default does nothing) and returns the result. Before returning the result, we convert
any `torch.Tensors` into numpy arrays and separate the batched output into a list of
individual dicts per instance. Note that typically this will be faster on a GPU (and
conditionally, on a CPU) than repeated calls to `forward_on_instance`.
# Parameters
instances : `List[Instance]`, required
The instances to run the model on.
# Returns
A list of the models output for each instance.
"""
batch_size = len(instances)
with torch.no_grad():
cuda_device = self._get_prediction_device()
dataset = Batch(instances)
dataset.index_instances(self.vocab)
model_input = util.move_to_device(dataset.as_tensor_dict(), cuda_device)
outputs = self.make_output_human_readable(self(**model_input))
instance_separated_output: List[Dict[str, numpy.ndarray]] = [
{} for _ in dataset.instances
]
for name, output in list(outputs.items()):
if isinstance(output, torch.Tensor):
# NOTE(markn): This is a hack because 0-dim pytorch tensors are not iterable.
# This occurs with batch size 1, because we still want to include the loss in that case.
if output.dim() == 0:
output = output.unsqueeze(0)
if output.size(0) != batch_size:
self._maybe_warn_for_unseparable_batches(name)
continue
output = output.detach().cpu().numpy()
elif len(output) != batch_size:
self._maybe_warn_for_unseparable_batches(name)
continue
for instance_output, batch_element in zip(instance_separated_output, output):
instance_output[name] = batch_element
return instance_separated_output
示例12: _first_order_taylor
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import move_to_device [as 别名]
def _first_order_taylor(self, grad: numpy.ndarray, token_idx: torch.Tensor, sign: int) -> int:
"""
The below code is based on
https://github.com/pmichel31415/translate/blob/paul/pytorch_translate/
research/adversarial/adversaries/brute_force_adversary.py
Replaces the current token_idx with another token_idx to increase the loss. In particular, this
function uses the grad, alongside the embedding_matrix to select the token that maximizes the
first-order taylor approximation of the loss.
"""
grad = util.move_to_device(torch.from_numpy(grad), self.cuda_device)
if token_idx.size() != ():
# We've got an encoder that only has character ids as input. We don't curently handle
# this case, and it's not clear it's worth it to implement it. We'll at least give a
# nicer error than some pytorch dimension mismatch.
raise NotImplementedError(
"You are using a character-level indexer with no other indexers. This case is not "
"currently supported for hotflip. If you would really like to see us support "
"this, please open an issue on github."
)
if token_idx >= self.embedding_matrix.size(0):
# This happens when we've truncated our fake embedding matrix. We need to do a dot
# product with the word vector of the current token; if that token is out of
# vocabulary for our truncated matrix, we need to run it through the embedding layer.
inputs = self._make_embedder_input([self.vocab.get_token_from_index(token_idx)])
word_embedding = self.embedding_layer(inputs)[0]
else:
word_embedding = torch.nn.functional.embedding(
util.move_to_device(torch.LongTensor([token_idx]), self.cuda_device),
self.embedding_matrix,
)
word_embedding = word_embedding.detach().unsqueeze(0)
grad = grad.unsqueeze(0).unsqueeze(0)
# solves equation (3) here https://arxiv.org/abs/1903.06620
new_embed_dot_grad = torch.einsum("bij,kj->bik", (grad, self.embedding_matrix))
prev_embed_dot_grad = torch.einsum("bij,bij->bi", (grad, word_embedding)).unsqueeze(-1)
neg_dir_dot_grad = sign * (prev_embed_dot_grad - new_embed_dot_grad)
neg_dir_dot_grad = neg_dir_dot_grad.detach().cpu().numpy()
# Do not replace with non-alphanumeric tokens
neg_dir_dot_grad[:, :, self.invalid_replacement_indices] = -numpy.inf
best_at_each_step = neg_dir_dot_grad.argmax(2)
return best_at_each_step[0].data[0]
示例13: get_gradients
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import move_to_device [as 别名]
def get_gradients(self, instances: List[Instance]) -> Tuple[Dict[str, Any], Dict[str, Any]]:
"""
Gets the gradients of the loss with respect to the model inputs.
# Parameters
instances : `List[Instance]`
# Returns
`Tuple[Dict[str, Any], Dict[str, Any]]`
The first item is a Dict of gradient entries for each input.
The keys have the form `{grad_input_1: ..., grad_input_2: ... }`
up to the number of inputs given. The second item is the model's output.
# Notes
Takes a `JsonDict` representing the inputs of the model and converts
them to [`Instances`](../data/instance.md)), sends these through
the model [`forward`](../models/model.md#forward) function after registering hooks on the embedding
layer of the model. Calls `backward` on the loss and then removes the
hooks.
"""
# set requires_grad to true for all parameters, but save original values to
# restore them later
original_param_name_to_requires_grad_dict = {}
for param_name, param in self._model.named_parameters():
original_param_name_to_requires_grad_dict[param_name] = param.requires_grad
param.requires_grad = True
embedding_gradients: List[Tensor] = []
hooks: List[RemovableHandle] = self._register_embedding_gradient_hooks(embedding_gradients)
dataset = Batch(instances)
dataset.index_instances(self._model.vocab)
dataset_tensor_dict = util.move_to_device(dataset.as_tensor_dict(), self.cuda_device)
# To bypass "RuntimeError: cudnn RNN backward can only be called in training mode"
with backends.cudnn.flags(enabled=False):
outputs = self._model.make_output_human_readable(
self._model.forward(**dataset_tensor_dict) # type: ignore
)
loss = outputs["loss"]
self._model.zero_grad()
loss.backward()
for hook in hooks:
hook.remove()
grad_dict = dict()
for idx, grad in enumerate(embedding_gradients):
key = "grad_input_" + str(idx + 1)
grad_dict[key] = grad.detach().cpu().numpy()
# restore the original requires_grad values of the parameters
for param_name, param in self._model.named_parameters():
param.requires_grad = original_param_name_to_requires_grad_dict[param_name]
return grad_dict, outputs
示例14: run_evaluation
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import move_to_device [as 别名]
def run_evaluation(evaluation_file, model_archive,
random_candidates=False):
archive = load_archive(model_archive)
model = archive.model
vocab = model.vocab
params = archive.config
model.multitask = False
model.multitask_kg = False
model.cuda()
model.eval()
for p in model.parameters():
p.requires_grad_(False)
reader_params = params.pop('dataset_reader')
if reader_params['type'] == 'multitask_reader':
reader_params = reader_params['dataset_readers']['language_modeling']
if random_candidates:
for k, v in reader_params['base_reader']['tokenizer_and_candidate_generator']['entity_candidate_generators'].items():
v['random_candidates'] = True
reader = DatasetReader.from_params(Params(reader_params))
iterator = DataIterator.from_params(Params({
"type": "self_attn_bucket",
"batch_size_schedule": "base-11gb-fp32",
"iterator":{
"type": "bucket",
"batch_size": 32,
"sorting_keys": [["tokens", "num_tokens"]],
"max_instances_in_memory": 2500,
}
}))
iterator.index_with(vocab)
instances = reader.read(evaluation_file)
for batch_no, batch in enumerate(tqdm.tqdm(iterator(instances, num_epochs=1))):
b = move_to_device(batch, 0)
loss = model(**b)
if batch_no % 100 == 0:
print(model.get_metrics())
print(model.get_metrics())
示例15: _batch_loss
# 需要导入模块: from allennlp.nn import util [as 别名]
# 或者: from allennlp.nn.util import move_to_device [as 别名]
def _batch_loss(self, batch: torch.Tensor,
for_training: bool,
batch_aux: torch.Tensor=None,
batch_aux2: torch.Tensor=None) -> torch.Tensor:
"""
Does a forward pass on the given batch and auxiliary data batches and returns the ``loss`` value in the result.
If ``for_training`` is `True` also applies regularization penalty.
"""
if self._multiple_gpu:
output_dict = self._data_parallel(batch)
if batch_aux is not None:
raise ConfigurationError('multi-gpu not supported for multi-task training.')
else:
batch = util.move_to_device(batch, self._cuda_devices[0])
output_dict = self._model(**batch)
try:
loss = output_dict["loss"]
if for_training:
loss += self._model.get_regularization_penalty()
except KeyError:
if for_training:
raise RuntimeError("The model you are trying to optimize does not contain a"
" 'loss' key in the output of model.forward(inputs).")
loss = None
if batch_aux is not None and batch_aux2 is not None:
batch_aux = util.move_to_device(batch_aux, self._cuda_devices[0])
batch_aux2 = util.move_to_device(batch_aux2, self._cuda_devices[0])
output_dict_aux = self._model(**batch_aux)
output_dict_aux2 = self._model(**batch_aux2)
try:
loss_aux = output_dict_aux["loss"]
loss_aux2 = output_dict_aux2["loss"]
if for_training:
loss_aux += self._model.get_regularization_penalty()
loss_aux2 += self._model.get_regularization_penalty()
except KeyError:
raise ConfigurationError("The auxiliary model you are trying to optimize does not contain a"
" 'loss' key in the output of model.forward(inputs).")
# multi-task loss
loss = loss + self._mixing_ratio * loss_aux + self._mixing_ratio2 * loss_aux2
return loss