本文整理汇总了Python中torch.Tensor.view方法的典型用法代码示例。如果您正苦于以下问题:Python Tensor.view方法的具体用法?Python Tensor.view怎么用?Python Tensor.view使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.Tensor
的用法示例。
在下文中一共展示了Tensor.view方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __call__
# 需要导入模块: from torch import Tensor [as 别名]
# 或者: from torch.Tensor import view [as 别名]
def __call__(self,
predictions: torch.Tensor,
gold_labels: torch.Tensor,
mask: Optional[torch.Tensor] = None):
"""
Parameters
----------
predictions : ``torch.Tensor``, required.
A tensor of predictions of shape (batch_size, ...).
gold_labels : ``torch.Tensor``, required.
A tensor of the same shape as ``predictions``.
mask: ``torch.Tensor``, optional (default = None).
A tensor of the same shape as ``predictions``.
"""
predictions, gold_labels, mask = self.unwrap_to_tensors(predictions, gold_labels, mask)
if mask is not None:
# We can multiply by the mask up front, because we're just checking equality below, and
# this way everything that's masked will be equal.
predictions = predictions * mask
gold_labels = gold_labels * mask
batch_size = predictions.size(0)
predictions = predictions.view(batch_size, -1)
gold_labels = gold_labels.view(batch_size, -1)
# The .prod() here is functioning as a logical and.
correct = predictions.eq(gold_labels).prod(dim=1).float()
count = torch.ones(gold_labels.size(0))
self._correct_count += correct.sum()
self._total_count += count.sum()
示例2: __call__
# 需要导入模块: from torch import Tensor [as 别名]
# 或者: from torch.Tensor import view [as 别名]
def __call__(self,
predictions: torch.Tensor,
gold_labels: torch.Tensor,
mask: Optional[torch.Tensor] = None):
"""
Parameters
----------
predictions : ``torch.Tensor``, required.
A tensor of predictions of shape (batch_size, ...).
gold_labels : ``torch.Tensor``, required.
A tensor of the same shape as ``predictions``.
mask: ``torch.Tensor``, optional (default = None).
A tensor of the same shape as ``predictions``.
"""
predictions, gold_labels, mask = self.unwrap_to_tensors(predictions, gold_labels, mask)
# Flatten predictions, gold_labels, and mask. We calculate the covariance between
# the vectors, since each element in the predictions and gold_labels tensor is assumed
# to be a separate observation.
predictions = predictions.view(-1)
gold_labels = gold_labels.view(-1)
if mask is not None:
mask = mask.view(-1)
predictions = predictions * mask
gold_labels = gold_labels * mask
num_batch_items = torch.sum(mask).item()
else:
num_batch_items = gold_labels.numel()
# Note that self._total_count must be a float or int at all times
# If it is a 1-dimension Tensor, the previous count will equal the updated_count.
# The sampe applies for previous_total_prediction_mean and
# previous_total_label_mean below -- we handle this in the code by
# calling .item() judiciously.
previous_count = self._total_count
updated_count = self._total_count + num_batch_items
batch_mean_prediction = torch.sum(predictions) / num_batch_items
delta_mean_prediction = ((batch_mean_prediction - self._total_prediction_mean) *
num_batch_items) / updated_count
previous_total_prediction_mean = self._total_prediction_mean
self._total_prediction_mean += delta_mean_prediction.item()
batch_mean_label = torch.sum(gold_labels) / num_batch_items
delta_mean_label = ((batch_mean_label - self._total_label_mean) * num_batch_items) / updated_count
previous_total_label_mean = self._total_label_mean
self._total_label_mean += delta_mean_label.item()
batch_coresiduals = (predictions - batch_mean_prediction) * (gold_labels - batch_mean_label)
if mask is not None:
batch_co_moment = torch.sum(batch_coresiduals * mask)
else:
batch_co_moment = torch.sum(batch_coresiduals)
delta_co_moment = (
batch_co_moment + (previous_total_prediction_mean - batch_mean_prediction) *
(previous_total_label_mean - batch_mean_label) *
(previous_count * num_batch_items / updated_count))
self._total_co_moment += delta_co_moment.item()
self._total_count = updated_count
示例3: forward
# 需要导入模块: from torch import Tensor [as 别名]
# 或者: from torch.Tensor import view [as 别名]
def forward(self, x: torch.Tensor) -> torch.Tensor:
if self.rf == 1:
size_out = x.size()[:-1] + (self.nf,)
x = torch.addmm(self.b, x.view(-1, x.size(-1)), self.w)
x = x.view(*size_out)
else:
raise NotImplementedError
return x
示例4: __call__
# 需要导入模块: from torch import Tensor [as 别名]
# 或者: from torch.Tensor import view [as 别名]
def __call__(self,
predictions: torch.Tensor,
gold_labels: torch.Tensor,
mask: Optional[torch.Tensor] = None):
"""
Parameters
----------
predictions : ``torch.Tensor``, required.
A tensor of predictions of shape (batch_size, ..., num_classes).
gold_labels : ``torch.Tensor``, required.
A tensor of integer class label of shape (batch_size, ...). It must be the same
shape as the ``predictions`` tensor without the ``num_classes`` dimension.
mask: ``torch.Tensor``, optional (default = None).
A masking tensor the same size as ``gold_labels``.
"""
predictions, gold_labels, mask = self.unwrap_to_tensors(predictions, gold_labels, mask)
# Some sanity checks.
num_classes = predictions.size(-1)
if gold_labels.dim() != predictions.dim() - 1:
raise ConfigurationError("gold_labels must have dimension == predictions.size() - 1 but "
"found tensor of shape: {}".format(predictions.size()))
if (gold_labels >= num_classes).any():
raise ConfigurationError("A gold label passed to Categorical Accuracy contains an id >= {}, "
"the number of classes.".format(num_classes))
predictions = predictions.view((-1, num_classes))
gold_labels = gold_labels.view(-1).long()
if not self._tie_break:
# Top K indexes of the predictions (or fewer, if there aren't K of them).
# Special case topk == 1, because it's common and .max() is much faster than .topk().
if self._top_k == 1:
top_k = predictions.max(-1)[1].unsqueeze(-1)
else:
top_k = predictions.topk(min(self._top_k, predictions.shape[-1]), -1)[1]
# This is of shape (batch_size, ..., top_k).
correct = top_k.eq(gold_labels.unsqueeze(-1)).float()
else:
# prediction is correct if gold label falls on any of the max scores. distribute score by tie_counts
max_predictions = predictions.max(-1)[0]
max_predictions_mask = predictions.eq(max_predictions.unsqueeze(-1))
# max_predictions_mask is (rows X num_classes) and gold_labels is (batch_size)
# ith entry in gold_labels points to index (0-num_classes) for ith row in max_predictions
# For each row check if index pointed by gold_label is was 1 or not (among max scored classes)
correct = max_predictions_mask[torch.arange(gold_labels.numel()).long(), gold_labels].float()
tie_counts = max_predictions_mask.sum(-1)
correct /= tie_counts.float()
correct.unsqueeze_(-1)
if mask is not None:
correct *= mask.view(-1, 1).float()
self.total_count += mask.sum()
else:
self.total_count += gold_labels.numel()
self.correct_count += correct.sum()
示例5: _construct_loss
# 需要导入模块: from torch import Tensor [as 别名]
# 或者: from torch.Tensor import view [as 别名]
def _construct_loss(self,
arc_scores: torch.Tensor,
arc_tag_logits: torch.Tensor,
arc_tags: torch.Tensor,
mask: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
"""
Computes the arc and tag loss for an adjacency matrix.
Parameters
----------
arc_scores : ``torch.Tensor``, required.
A tensor of shape (batch_size, sequence_length, sequence_length) used to generate a
binary classification decision for whether an edge is present between two words.
arc_tag_logits : ``torch.Tensor``, required.
A tensor of shape (batch_size, sequence_length, sequence_length, num_tags) used to generate
a distribution over edge tags for a given edge.
arc_tags : ``torch.Tensor``, required.
A tensor of shape (batch_size, sequence_length, sequence_length).
The labels for every arc.
mask : ``torch.Tensor``, required.
A mask of shape (batch_size, sequence_length), denoting unpadded
elements in the sequence.
Returns
-------
arc_nll : ``torch.Tensor``, required.
The negative log likelihood from the arc loss.
tag_nll : ``torch.Tensor``, required.
The negative log likelihood from the arc tag loss.
"""
float_mask = mask.float()
arc_indices = (arc_tags != -1).float()
# Make the arc tags not have negative values anywhere
# (by default, no edge is indicated with -1).
arc_tags = arc_tags * arc_indices
arc_nll = self._arc_loss(arc_scores, arc_indices) * float_mask.unsqueeze(1) * float_mask.unsqueeze(2)
# We want the mask for the tags to only include the unmasked words
# and we only care about the loss with respect to the gold arcs.
tag_mask = float_mask.unsqueeze(1) * float_mask.unsqueeze(2) * arc_indices
batch_size, sequence_length, _, num_tags = arc_tag_logits.size()
original_shape = [batch_size, sequence_length, sequence_length]
reshaped_logits = arc_tag_logits.view(-1, num_tags)
reshaped_tags = arc_tags.view(-1)
tag_nll = self._tag_loss(reshaped_logits, reshaped_tags.long()).view(original_shape) * tag_mask
valid_positions = tag_mask.sum()
arc_nll = arc_nll.sum() / valid_positions.float()
tag_nll = tag_nll.sum() / valid_positions.float()
return arc_nll, tag_nll
示例6: split_heads
# 需要导入模块: from torch import Tensor [as 别名]
# 或者: from torch.Tensor import view [as 别名]
def split_heads(self, x: torch.Tensor, k: bool = False):
new_x_shape = x.size()[:-1] + (self.n_head, x.size(-1) // self.n_head)
x = x.view(*new_x_shape) # in Tensorflow implem: fct split_states
if k:
return x.permute(0, 2, 3, 1)
else:
return x.permute(0, 2, 1, 3)
示例7: get_best_candidates
# 需要导入模块: from torch import Tensor [as 别名]
# 或者: from torch.Tensor import view [as 别名]
def get_best_candidates(batch_candidates: Tensor, batch_values: Tensor) -> Tensor:
r"""Extract best (q-batch) candidate from batch of candidates
Args:
batch_candidates: A `b x q x d` tensor of `b` q-batch candidates, or a
`b x d` tensor of `b` single-point candidates.
batch_values: A tensor with `b` elements containing the value of the
respective candidate (higher is better).
Returns:
A tensor of size `q x d` (if q-batch mode) or `d` from batch_candidates
with the highest associated value.
Example:
>>> qEI = qExpectedImprovement(model, best_f=0.2)
>>> bounds = torch.tensor([[0., 0.], [1., 2.]])
>>> Xinit = gen_batch_initial_conditions(
>>> qEI, bounds, q=3, num_restarts=25, raw_samples=500
>>> )
>>> batch_candidates, batch_acq_values = gen_candidates_scipy(
initial_conditions=Xinit,
acquisition_function=qEI,
lower_bounds=bounds[0],
upper_bounds=bounds[1],
)
>>> best_candidates = get_best_candidates(batch_candidates, batch_acq_values)
"""
best = torch.max(batch_values.view(-1), dim=0)[1].item()
return batch_candidates[best]
示例8: __call__
# 需要导入模块: from torch import Tensor [as 别名]
# 或者: from torch.Tensor import view [as 别名]
def __call__(self,
predictions: torch.Tensor,
gold_labels: torch.Tensor,
mask: Optional[torch.Tensor] = None):
"""
Parameters
----------
predictions : ``torch.Tensor``, required.
A tensor of predictions of shape (batch_size, ...).
gold_labels : ``torch.Tensor``, required.
A tensor of the same shape as ``predictions``.
mask: ``torch.Tensor``, optional (default = None).
A tensor of the same shape as ``predictions``.
"""
predictions, gold_labels, mask = self.unwrap_to_tensors(predictions, gold_labels, mask)
batch_size = predictions.size(0)
if mask is not None:
# We can multiply by the mask up front, because we're just checking equality below, and
# this way everything that's masked will be equal.
predictions = predictions * mask
gold_labels = gold_labels * mask
# We want to skip predictions that are completely masked;
# so we'll keep predictions that aren't.
keep = mask.view(batch_size, -1).max(dim=1)[0].float()
else:
keep = torch.ones(batch_size).float()
predictions = predictions.view(batch_size, -1)
gold_labels = gold_labels.view(batch_size, -1)
# At this point, predictions is (batch_size, rest_of_dims_combined),
# so .eq -> .prod will be 1 if every element of the instance prediction is correct
# and 0 if at least one element of the instance prediction is wrong.
# Because of how we're handling masking, masked positions are automatically "correct".
correct = predictions.eq(gold_labels).prod(dim=1).float()
# Since masked positions are correct, we need to explicitly exclude instance predictions
# where the entire prediction is masked (because they look "correct").
self._correct_count += (correct * keep).sum()
self._total_count += keep.sum()
示例9: forward
# 需要导入模块: from torch import Tensor [as 别名]
# 或者: from torch.Tensor import view [as 别名]
def forward(self, # pylint: disable=arguments-differ
inputs: torch.Tensor) -> Dict[str, Union[torch.Tensor, List[torch.Tensor]]]:
"""
Parameters
----------
inputs : ``torch.autograd.Variable``
Shape ``(batch_size, timesteps, 50)`` of character ids representing the current batch.
We also accept tensors with additional optional dimensions:
``(batch_size, dim0, dim1, ..., dimn, timesteps, 50)``
Returns
-------
Dict with keys:
``'elmo_representations'``: ``List[torch.autograd.Variable]``
A ``num_output_representations`` list of ELMo representations for the input sequence.
Each representation is shape ``(batch_size, timesteps, embedding_dim)``
``'mask'``: ``torch.autograd.Variable``
Shape ``(batch_size, timesteps)`` long tensor with sequence mask.
"""
# reshape the input if needed
original_shape = inputs.size()
timesteps, num_characters = original_shape[-2:]
if len(original_shape) > 3:
reshaped_inputs = inputs.view(-1, timesteps, num_characters)
else:
reshaped_inputs = inputs
# run the biLM
bilm_output = self._elmo_lstm(reshaped_inputs)
layer_activations = bilm_output['activations']
mask_with_bos_eos = bilm_output['mask']
# compute the elmo representations
representations = []
for i in range(len(self._scalar_mixes)):
scalar_mix = getattr(self, 'scalar_mix_{}'.format(i))
representation_with_bos_eos = scalar_mix(layer_activations, mask_with_bos_eos)
representation_without_bos_eos, mask_without_bos_eos = remove_sentence_boundaries(
representation_with_bos_eos, mask_with_bos_eos
)
representations.append(self._dropout(representation_without_bos_eos))
# reshape if necessary
if len(original_shape) > 3:
mask = mask_without_bos_eos.view(original_shape[:-1])
elmo_representations = [representation.view(original_shape[:-1] + (-1, ))
for representation in representations]
else:
mask = mask_without_bos_eos
elmo_representations = representations
return {'elmo_representations': elmo_representations, 'mask': mask}
示例10: batched_index_select
# 需要导入模块: from torch import Tensor [as 别名]
# 或者: from torch.Tensor import view [as 别名]
def batched_index_select(target: torch.Tensor,
indices: torch.LongTensor,
flattened_indices: Optional[torch.LongTensor] = None) -> torch.Tensor:
"""
The given ``indices`` of size ``(batch_size, d_1, ..., d_n)`` indexes into the sequence
dimension (dimension 2) of the target, which has size ``(batch_size, sequence_length,
embedding_size)``.
This function returns selected values in the target with respect to the provided indices, which
have size ``(batch_size, d_1, ..., d_n, embedding_size)``. This can use the optionally
precomputed :func:`~flattened_indices` with size ``(batch_size * d_1 * ... * d_n)`` if given.
An example use case of this function is looking up the start and end indices of spans in a
sequence tensor. This is used in the
:class:`~allennlp.models.coreference_resolution.CoreferenceResolver`. Model to select
contextual word representations corresponding to the start and end indices of mentions. The key
reason this can't be done with basic torch functions is that we want to be able to use look-up
tensors with an arbitrary number of dimensions (for example, in the coref model, we don't know
a-priori how many spans we are looking up).
Parameters
----------
target : ``torch.Tensor``, required.
A 3 dimensional tensor of shape (batch_size, sequence_length, embedding_size).
This is the tensor to be indexed.
indices : ``torch.LongTensor``
A tensor of shape (batch_size, ...), where each element is an index into the
``sequence_length`` dimension of the ``target`` tensor.
flattened_indices : Optional[torch.Tensor], optional (default = None)
An optional tensor representing the result of calling :func:~`flatten_and_batch_shift_indices`
on ``indices``. This is helpful in the case that the indices can be flattened once and
cached for many batch lookups.
Returns
-------
selected_targets : ``torch.Tensor``
A tensor with shape [indices.size(), target.size(-1)] representing the embedded indices
extracted from the batch flattened target tensor.
"""
if flattened_indices is None:
# Shape: (batch_size * d_1 * ... * d_n)
flattened_indices = flatten_and_batch_shift_indices(indices, target.size(1))
# Shape: (batch_size * sequence_length, embedding_size)
flattened_target = target.view(-1, target.size(-1))
# Shape: (batch_size * d_1 * ... * d_n, embedding_size)
flattened_selected = flattened_target.index_select(0, flattened_indices)
selected_shape = list(indices.size()) + [target.size(-1)]
# Shape: (batch_size, d_1, ..., d_n, embedding_size)
selected_targets = flattened_selected.view(*selected_shape)
return selected_targets
示例11: multioutput_to_batch_mode_transform
# 需要导入模块: from torch import Tensor [as 别名]
# 或者: from torch.Tensor import view [as 别名]
def multioutput_to_batch_mode_transform(
train_X: Tensor,
train_Y: Tensor,
num_outputs: int,
train_Yvar: Optional[Tensor] = None,
) -> Tuple[Tensor, Tensor, Optional[Tensor]]:
r"""Transforms training inputs for a multi-output model.
Used for multi-output models that internally are represented by a
batched single output model, where each output is modeled as an
independent batch.
Args:
train_X: A `n x d` or `batch_shape x n x d` (batch mode) tensor of training
features.
train_Y: A `n x (o)` or `batch_shape x n x (o)` (batch mode) tensor of
training observations.
num_outputs: number of outputs
train_Yvar: A `batch_shape x n x (o)`
tensor of observed measurement noise.
Returns:
3-element tuple containing
- A `(o) x batch_shape x n x d` tensor of training features.
- A `(o) x batch_shape x n` tensor of training observations.
- A `(o) x batch_shape x n` tensor observed measurement noise.
"""
input_batch_shape = train_X.shape[:-2]
if num_outputs > 1:
# make train_Y `o x batch_shape x n`
train_Y = train_Y.permute(-1, *range(train_Y.dim() - 1))
# expand train_X to `o x batch_shape x n x d`
train_X = train_X.unsqueeze(0).expand(
torch.Size([num_outputs] + [-1] * train_X.dim())
)
if train_Yvar is not None:
# make train_Yvar `o x batch_shape x n`
train_Yvar = train_Yvar.permute(-1, *range(train_Yvar.dim() - 1))
elif train_Y.dim() > 1:
# single output, make train_Y `batch_shape x n`
target_shape = input_batch_shape + torch.Size([-1])
train_Y = train_Y.view(target_shape)
if train_Yvar is not None:
# make train_Yvar `batch_shape x n`
train_Yvar = train_Yvar.view(target_shape)
return train_X, train_Y, train_Yvar
示例12: last_dim_softmax
# 需要导入模块: from torch import Tensor [as 别名]
# 或者: from torch.Tensor import view [as 别名]
def last_dim_softmax(tensor: torch.Tensor, mask: Optional[torch.Tensor] = None) -> torch.Tensor:
"""
Takes a tensor with 3 or more dimensions and does a masked softmax over the last dimension. We
assume the tensor has shape ``(batch_size, ..., sequence_length)`` and that the mask (if given)
has shape ``(batch_size, sequence_length)``. We first unsqueeze and expand the mask so that it
has the same shape as the tensor, then flatten them both to be 2D, pass them through
:func:`masked_softmax`, then put the tensor back in its original shape.
"""
tensor_shape = tensor.size()
reshaped_tensor = tensor.view(-1, tensor.size()[-1])
if mask is not None:
while mask.dim() < tensor.dim():
mask = mask.unsqueeze(1)
mask = mask.expand_as(tensor).contiguous().float()
mask = mask.view(-1, mask.size()[-1])
reshaped_result = masked_softmax(reshaped_tensor, mask)
return reshaped_result.view(*tensor_shape)
示例13: _last_dimension_applicator
# 需要导入模块: from torch import Tensor [as 别名]
# 或者: from torch.Tensor import view [as 别名]
def _last_dimension_applicator(function_to_apply: Callable[[torch.Tensor, Optional[torch.Tensor]], torch.Tensor],
tensor: torch.Tensor,
mask: Optional[torch.Tensor] = None):
"""
Takes a tensor with 3 or more dimensions and applies a function over the last dimension. We
assume the tensor has shape ``(batch_size, ..., sequence_length)`` and that the mask (if given)
has shape ``(batch_size, sequence_length)``. We first unsqueeze and expand the mask so that it
has the same shape as the tensor, then flatten them both to be 2D, pass them through
the function and put the tensor back in its original shape.
"""
tensor_shape = tensor.size()
reshaped_tensor = tensor.view(-1, tensor.size()[-1])
if mask is not None:
while mask.dim() < tensor.dim():
mask = mask.unsqueeze(1)
mask = mask.expand_as(tensor).contiguous().float()
mask = mask.view(-1, mask.size()[-1])
reshaped_result = function_to_apply(reshaped_tensor, mask)
return reshaped_result.view(*tensor_shape)
示例14: forward
# 需要导入模块: from torch import Tensor [as 别名]
# 或者: from torch.Tensor import view [as 别名]
def forward(
self,
words: torch.Tensor,
):
self.word_to_char = self.word_to_char.type_as(words)
flat_words = words.view(-1)
word_embs = self._convolve(self.word_to_char[flat_words])
pads = flat_words.eq(self.vocab.pad())
if pads.any():
word_embs[pads] = 0
eos = flat_words.eq(self.vocab.eos())
if eos.any():
word_embs[eos] = self.symbol_embeddings[self.eos_idx]
unk = flat_words.eq(self.vocab.unk())
if unk.any():
word_embs[unk] = self.symbol_embeddings[self.unk_idx]
return word_embs.view(words.size() + (-1,))
示例15: forward
# 需要导入模块: from torch import Tensor [as 别名]
# 或者: from torch.Tensor import view [as 别名]
def forward(self, # pylint: disable=arguments-differ
inputs: torch.Tensor,
word_inputs: torch.Tensor = None) -> Dict[str, Union[torch.Tensor, List[torch.Tensor]]]:
"""
Parameters
----------
inputs: ``torch.Tensor``, required.
Shape ``(batch_size, timesteps, 50)`` of character ids representing the current batch.
word_inputs : ``torch.Tensor``, required.
If you passed a cached vocab, you can in addition pass a tensor of shape
``(batch_size, timesteps)``, which represent word ids which have been pre-cached.
Returns
-------
Dict with keys:
``'elmo_representations'``: ``List[torch.Tensor]``
A ``num_output_representations`` list of ELMo representations for the input sequence.
Each representation is shape ``(batch_size, timesteps, embedding_dim)``
``'mask'``: ``torch.Tensor``
Shape ``(batch_size, timesteps)`` long tensor with sequence mask.
"""
# reshape the input if needed
original_shape = inputs.size()
if len(original_shape) > 3:
timesteps, num_characters = original_shape[-2:]
reshaped_inputs = inputs.view(-1, timesteps, num_characters)
else:
reshaped_inputs = inputs
if word_inputs is not None:
original_word_size = word_inputs.size()
if self._has_cached_vocab and len(original_word_size) > 2:
reshaped_word_inputs = word_inputs.view(-1, original_word_size[-1])
elif not self._has_cached_vocab:
logger.warning("Word inputs were passed to ELMo but it does not have a cached vocab.")
reshaped_word_inputs = None
else:
reshaped_word_inputs = word_inputs
else:
reshaped_word_inputs = word_inputs
# run the biLM
bilm_output = self._elmo_lstm(reshaped_inputs, reshaped_word_inputs)
layer_activations = bilm_output['activations']
mask_with_bos_eos = bilm_output['mask']
# compute the elmo representations
representations = []
for i in range(len(self._scalar_mixes)):
scalar_mix = getattr(self, 'scalar_mix_{}'.format(i))
representation_with_bos_eos = scalar_mix(layer_activations, mask_with_bos_eos)
representation_without_bos_eos, mask_without_bos_eos = remove_sentence_boundaries(
representation_with_bos_eos, mask_with_bos_eos
)
representations.append(self._dropout(representation_without_bos_eos))
# reshape if necessary
if word_inputs is not None and len(original_word_size) > 2:
mask = mask_without_bos_eos.view(original_word_size)
elmo_representations = [representation.view(original_word_size + (-1, ))
for representation in representations]
elif len(original_shape) > 3:
mask = mask_without_bos_eos.view(original_shape[:-1])
elmo_representations = [representation.view(original_shape[:-1] + (-1, ))
for representation in representations]
else:
mask = mask_without_bos_eos
elmo_representations = representations
return {'elmo_representations': elmo_representations, 'mask': mask}