本文整理汇总了Python中fairseq.utils.get_activation_fn方法的典型用法代码示例。如果您正苦于以下问题:Python utils.get_activation_fn方法的具体用法?Python utils.get_activation_fn怎么用?Python utils.get_activation_fn使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类fairseq.utils
的用法示例。
在下文中一共展示了utils.get_activation_fn方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import get_activation_fn [as 别名]
def __init__(self, args):
super().__init__()
self.embed_dim = args.encoder_embed_dim
self.quant_noise = getattr(args, "quant_noise_pq", 0)
self.quant_noise_block_size = getattr(args, "quant_noise_pq_block_size", 8)
self.self_attn = self.build_self_attention(self.embed_dim, args)
self.self_attn_layer_norm = LayerNorm(self.embed_dim)
self.dropout = args.dropout
self.activation_fn = utils.get_activation_fn(
activation=getattr(args, "activation_fn", "relu")
)
self.activation_dropout = getattr(args, "activation_dropout", 0)
if self.activation_dropout == 0:
# for backwards compatibility with models that use args.relu_dropout
self.activation_dropout = getattr(args, "relu_dropout", 0)
self.normalize_before = args.encoder_normalize_before
self.fc1 = self.build_fc1(
self.embed_dim, args.encoder_ffn_embed_dim, self.quant_noise, self.quant_noise_block_size
)
self.fc2 = self.build_fc2(
args.encoder_ffn_embed_dim, self.embed_dim, self.quant_noise, self.quant_noise_block_size
)
self.final_layer_norm = LayerNorm(self.embed_dim)
示例2: __init__
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import get_activation_fn [as 别名]
def __init__(self, embed_dim, output_dim, activation_fn, weight=None):
super().__init__()
self.dense = nn.Linear(embed_dim, embed_dim)
self.activation_fn = utils.get_activation_fn(activation_fn)
self.layer_norm = LayerNorm(embed_dim)
if weight is None:
weight = nn.Linear(embed_dim, output_dim, bias=False).weight
self.weight = weight
self.bias = nn.Parameter(torch.zeros(output_dim))
示例3: __init__
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import get_activation_fn [as 别名]
def __init__(self, embed_dim, output_dim, activation_fn, weight=None):
super().__init__()
self.dense = ColumnParallelLinear(embed_dim, embed_dim, gather_output=True)
self.activation_fn = utils.get_activation_fn(activation_fn)
self.layer_norm = LayerNorm(embed_dim)
if weight is None:
weight = nn.Linear(embed_dim, output_dim, bias=False).weight
self.weight = weight
self.bias = nn.Parameter(torch.zeros(output_dim))
示例4: __init__
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import get_activation_fn [as 别名]
def __init__(
self,
embedding_dim: float = 768,
ffn_embedding_dim: float = 3072,
num_attention_heads: float = 8,
dropout: float = 0.1,
attention_dropout: float = 0.1,
activation_dropout: float = 0.1,
activation_fn: str = 'relu',
add_bias_kv: bool = False,
add_zero_attn: bool = False,
export: bool = False,
use_residual: bool = True,
use_norm: bool = True,
) -> None:
super().__init__()
self.use_residual = use_residual
self.use_norm = use_norm
# Initialize parameters
self.embedding_dim = embedding_dim
self.dropout = dropout
self.activation_dropout = activation_dropout
# Initialize blocks
self.activation_fn = utils.get_activation_fn(activation_fn)
self.self_attn = MultiheadAttention(
self.embedding_dim,
num_attention_heads,
dropout=attention_dropout,
add_bias_kv=add_bias_kv,
add_zero_attn=add_zero_attn,
self_attention=True
)
# layer norm associated with the self attention layer
self.self_attn_layer_norm = LayerNorm(self.embedding_dim, export=export)
self.fc1 = nn.Linear(self.embedding_dim, ffn_embedding_dim)
self.fc2 = nn.Linear(ffn_embedding_dim, self.embedding_dim)
# layer norm associated with the position wise feed-forward NN
self.final_layer_norm = LayerNorm(self.embedding_dim, export=export)
self.apply(init_bert_params)
示例5: __init__
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import get_activation_fn [as 别名]
def __init__(
self,
embedding_dim: int = 768,
ffn_embedding_dim: int = 3072,
num_attention_heads: int = 8,
dropout: float = 0.1,
attention_dropout: float = 0.1,
activation_dropout: float = 0.1,
activation_fn: str = 'relu',
export: bool = False,
q_noise: float = 0.0,
qn_block_size: int = 8,
) -> None:
super().__init__()
# Initialize parameters
self.embedding_dim = embedding_dim
self.dropout = dropout
self.activation_dropout = activation_dropout
# Initialize blocks
self.activation_fn = utils.get_activation_fn(activation_fn)
self.self_attn = self.build_self_attention(
self.embedding_dim,
num_attention_heads,
dropout=attention_dropout,
self_attention=True,
q_noise=q_noise,
qn_block_size=qn_block_size,
)
# layer norm associated with the self attention layer
self.self_attn_layer_norm = LayerNorm(self.embedding_dim, export=export)
self.fc1 = self.build_fc1(
self.embedding_dim,
ffn_embedding_dim,
q_noise=q_noise,
qn_block_size=qn_block_size,
)
self.fc2 = self.build_fc2(
ffn_embedding_dim,
self.embedding_dim,
q_noise=q_noise,
qn_block_size=qn_block_size,
)
# layer norm associated with the position wise feed-forward NN
self.final_layer_norm = LayerNorm(self.embedding_dim, export=export)
示例6: __init__
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import get_activation_fn [as 别名]
def __init__(self, args, dictionary):
super().__init__(dictionary)
self.padding_idx = dictionary.pad()
self.vocab_size = dictionary.__len__()
self.max_positions = args.max_positions
self.sentence_encoder = TransformerSentenceEncoder(
padding_idx=self.padding_idx,
vocab_size=self.vocab_size,
num_encoder_layers=args.encoder_layers,
embedding_dim=args.encoder_embed_dim,
ffn_embedding_dim=args.encoder_ffn_embed_dim,
num_attention_heads=args.encoder_attention_heads,
dropout=args.dropout,
attention_dropout=args.attention_dropout,
activation_dropout=args.act_dropout,
max_seq_len=self.max_positions,
num_segments=args.num_segment,
use_position_embeddings=not args.no_token_positional_embeddings,
encoder_normalize_before=args.encoder_normalize_before,
apply_bert_init=args.apply_bert_init,
activation_fn=args.activation_fn,
learned_pos_embedding=args.encoder_learned_pos,
)
self.share_input_output_embed = args.share_encoder_input_output_embed
self.embed_out = None
self.sentence_projection_layer = None
self.sentence_out_dim = args.sentence_class_num
self.lm_output_learned_bias = None
# Remove head is set to true during fine-tuning
self.load_softmax = not getattr(args, 'remove_head', False)
self.masked_lm_pooler = nn.Linear(
args.encoder_embed_dim, args.encoder_embed_dim
)
self.pooler_activation = utils.get_activation_fn(args.pooler_activation_fn)
self.lm_head_transform_weight = nn.Linear(args.encoder_embed_dim, args.encoder_embed_dim)
self.activation_fn = utils.get_activation_fn(args.activation_fn)
self.layer_norm = LayerNorm(args.encoder_embed_dim)
self.lm_output_learned_bias = None
if self.load_softmax:
self.lm_output_learned_bias = nn.Parameter(torch.zeros(self.vocab_size))
if not self.share_input_output_embed:
self.embed_out = nn.Linear(
args.encoder_embed_dim,
self.vocab_size,
bias=False
)
if args.sent_loss:
self.sentence_projection_layer = nn.Linear(
args.encoder_embed_dim,
self.sentence_out_dim,
bias=False
)
示例7: __init__
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import get_activation_fn [as 别名]
def __init__(self, args):
super().__init__()
self.embed_dim = args.decoder_embed_dim
self.cross_self_attention = getattr(args, "cross_self_attention", False)
self.avg_attn = AverageAttention(self.embed_dim, dropout=args.attention_dropout)
# differently than original paper, we use a single gate
self.aan_gating_fc = fairseq_transformer.Linear(
self.embed_dim * 2, self.embed_dim
)
self.dropout = args.dropout
self.activation_fn = utils.get_activation_fn(
activation=getattr(args, "activation_fn", "relu")
)
self.activation_dropout = getattr(args, "activation_dropout", 0)
if self.activation_dropout == 0:
# for backwards compatibility with models that use args.relu_dropout
self.activation_dropout = getattr(args, "relu_dropout", 0)
self.normalize_before = args.decoder_normalize_before
# use layerNorm rather than FusedLayerNorm for exporting.
# char_inputs can be used to determint this.
# TODO remove this once we update apex with the fix
export = getattr(args, "char_inputs", False)
self.avg_attn_layer_norm = LayerNorm(self.embed_dim, export=export)
self.encoder_attn = MultiheadAttention(
self.embed_dim,
args.decoder_attention_heads,
kdim=getattr(args, "encoder_embed_dim", None),
vdim=getattr(args, "encoder_embed_dim", None),
dropout=args.attention_dropout,
encoder_decoder_attention=True,
)
self.encoder_attn_layer_norm = LayerNorm(self.embed_dim, export=export)
self.fc1 = fairseq_transformer.Linear(
self.embed_dim, args.decoder_ffn_embed_dim
)
self.fc2 = fairseq_transformer.Linear(
args.decoder_ffn_embed_dim, self.embed_dim
)
self.final_layer_norm = LayerNorm(self.embed_dim, export=export)
self.need_attn = True
self.onnx_trace = False
示例8: __init__
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import get_activation_fn [as 别名]
def __init__(
self,
args,
no_encoder_decoder_attn=False,
add_bias_kv=False,
add_zero_attn=False,
):
super().__init__()
self.embed_dim = args.decoder_embed_dim
self.self_attn = MultiheadAttention(
embed_dim=self.embed_dim,
num_heads=args.decoder_attention_heads,
dropout=args.attention_dropout,
add_bias_kv=add_bias_kv,
add_zero_attn=add_zero_attn,
self_attention=True,
)
self.dropout = args.dropout
self.activation_fn = utils.get_activation_fn(
activation=getattr(args, "activation_fn", "relu")
)
self.activation_dropout = getattr(args, "activation_dropout", 0)
if self.activation_dropout == 0:
# for backwards compatibility with models that use args.relu_dropout
self.activation_dropout = getattr(args, "relu_dropout", 0)
self.normalize_before = args.decoder_normalize_before
# use layerNorm rather than FusedLayerNorm for exporting.
# char_inputs can be used to determint this.
# TODO remove this once we update apex with the fix
export = getattr(args, "char_inputs", False)
self.self_attn_layer_norm = LayerNorm(self.embed_dim, export=export)
if no_encoder_decoder_attn:
self.encoder_attn = None
self.decoder_attn = None
self.encoder_layer_norm = None
self.decoder_layer_norm = None
else:
self.encoder_attn = MultiheadAttention(
self.embed_dim,
args.decoder_attention_heads,
dropout=args.attention_dropout,
encoder_decoder_attention=True,
)
self.decoder_attn = MultiheadAttention(
self.embed_dim,
args.decoder_attention_heads,
dropout=args.attention_dropout,
encoder_decoder_attention=True,
)
self.encoder_attn_layer_norm = LayerNorm(self.embed_dim, export=export)
self.decoder_attn_layer_norm = LayerNorm(self.embed_dim, export=export)
self.fc1 = Linear(self.embed_dim, args.decoder_ffn_embed_dim)
self.fc2 = Linear(args.decoder_ffn_embed_dim, self.embed_dim)
self.final_layer_norm = LayerNorm(self.embed_dim, export=export)
self.need_attn = True
self.onnx_trace = False
示例9: __init__
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import get_activation_fn [as 别名]
def __init__(
self,
embedding_dim: int = 768,
ffn_embedding_dim: int = 3072,
num_attention_heads: int = 8,
dropout: float = 0.1,
attention_dropout: float = 0.1,
activation_dropout: float = 0.1,
activation_fn: str = 'relu',
export: bool = False,
q_noise: float = 0.0,
qn_block_size: int = 8,
) -> None:
super().__init__()
# Initialize parameters
self.embedding_dim = embedding_dim
self.dropout = dropout
self.activation_dropout = activation_dropout
# Initialize blocks
self.activation_fn = utils.get_activation_fn(activation_fn)
self.self_attn = MultiheadAttention(
self.embedding_dim,
num_attention_heads,
dropout=attention_dropout,
add_bias_kv=False,
add_zero_attn=False,
self_attention=True,
q_noise=q_noise,
qn_block_size=qn_block_size,
)
# layer norm associated with the self attention layer
self.self_attn_layer_norm = LayerNorm(self.embedding_dim, export=export)
self.fc1 = quant_noise(
nn.Linear(self.embedding_dim, ffn_embedding_dim), q_noise, qn_block_size
)
self.fc2 = quant_noise(
nn.Linear(ffn_embedding_dim, self.embedding_dim), q_noise, qn_block_size
)
# layer norm associated with the position wise feed-forward NN
self.final_layer_norm = LayerNorm(self.embedding_dim, export=export)