本文整理汇总了Python中fairseq.utils.parse_embedding方法的典型用法代码示例。如果您正苦于以下问题:Python utils.parse_embedding方法的具体用法?Python utils.parse_embedding怎么用?Python utils.parse_embedding使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类fairseq.utils
的用法示例。
在下文中一共展示了utils.parse_embedding方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load_embedding
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import parse_embedding [as 别名]
def load_embedding(embedding, dictionary, pretrained_embed):
"""Loads pretrained embeddings.
Loads pretrained embeddings into a nn.Embedding layer. pretrained_embed
can either be a nn.Embedding layer, in which case the embedding is set
to the pretrained_embed argument, or a path to an embedding file.
Arguments:
embedding (pytorch_translate.common_layers.Embedding):
Embedding layer whose weights are to be set.
dictionary (fairseq.data.dictionary.Dictionary): dictionary with the
same vocabulary size as the embedding argument.
pretrained_embed (Union(string, nn.Embedding)): source of the
weights to be loaded.
"""
if pretrained_embed is None:
return
if isinstance(pretrained_embed, torch.nn.Embedding):
embedding.weight = pretrained_embed.weight
else:
embed_dict = utils.parse_embedding(pretrained_embed)
utils.load_embedding(embed_dict, dictionary, embedding)
embedding.init_normalization_if_needed()
示例2: build_model
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import parse_embedding [as 别名]
def build_model(cls, args, task):
"""Build a new model instance."""
# make sure that all args are properly defaulted (in case there are any new ones)
base_architecture(args)
encoder_embed_dict = None
if args.encoder_embed_path:
encoder_embed_dict = utils.parse_embedding(args.encoder_embed_path)
utils.print_embed_overlap(encoder_embed_dict, task.source_dictionary)
decoder_embed_dict = None
if args.decoder_embed_path:
decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path)
utils.print_embed_overlap(decoder_embed_dict, task.target_dictionary)
encoder = FConvEncoder(
dictionary=task.source_dictionary,
embed_dim=args.encoder_embed_dim,
embed_dict=encoder_embed_dict,
convolutions=eval(args.encoder_layers),
dropout=args.dropout,
max_positions=args.max_source_positions,
normalization_constant=args.normalization_constant,
)
decoder = FConvDecoder(
dictionary=task.target_dictionary,
embed_dim=args.decoder_embed_dim,
embed_dict=decoder_embed_dict,
convolutions=eval(args.decoder_layers),
out_embed_dim=args.decoder_out_embed_dim,
attention=eval(args.decoder_attention),
dropout=args.dropout,
max_positions=args.max_target_positions,
share_embed=args.share_input_output_embed,
normalization_constant=args.normalization_constant,
)
return FConvModel(encoder, decoder)
示例3: build_model
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import parse_embedding [as 别名]
def build_model(cls, args, task):
"""Build a new model instance."""
# make sure that all args are properly defaulted (in case there are any new ones)
base_architecture(args)
encoder_embed_dict = None
if args.encoder_embed_path:
encoder_embed_dict = utils.parse_embedding(args.encoder_embed_path)
utils.print_embed_overlap(encoder_embed_dict, task.source_dictionary)
decoder_embed_dict = None
if args.decoder_embed_path:
decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path)
utils.print_embed_overlap(decoder_embed_dict, task.target_dictionary)
encoder = FConvCustomEncoder(
dictionary=task.source_dictionary,
embed_dim=args.encoder_embed_dim,
embed_dict=encoder_embed_dict,
convolutions=eval(args.encoder_layers),
dropout=args.dropout,
max_positions=args.max_source_positions,
normalization_constant=args.normalization_constant,
token_dropout=args.source_token_dropout,
)
decoder = FConvCustomDecoder(
dictionary=task.target_dictionary,
embed_dim=args.decoder_embed_dim,
embed_dict=decoder_embed_dict,
convolutions=eval(args.decoder_layers),
out_embed_dim=args.decoder_out_embed_dim,
attention=eval(args.decoder_attention),
dropout=args.dropout,
max_positions=args.max_target_positions,
share_embed=args.share_input_output_embed,
normalization_constant=args.normalization_constant,
)
return FConvCustomModel(encoder, decoder)
示例4: build_embedding
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import parse_embedding [as 别名]
def build_embedding(cls, args, dictionary, embed_dim, path=None):
num_embeddings = len(dictionary)
padding_idx = dictionary.pad()
emb = Embedding(num_embeddings, embed_dim, padding_idx)
# if provided, load from preloaded dictionaries
if path:
embed_dict = utils.parse_embedding(path)
utils.load_embedding(embed_dict, dictionary, emb)
return emb
示例5: build_model
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import parse_embedding [as 别名]
def build_model(cls, args, task):
"""Build a new model instance."""
# make sure that all args are properly defaulted (in case there are any new ones)
base_architecture(args)
encoder_embed_dict = None
if args.encoder_embed_path:
encoder_embed_dict = utils.parse_embedding(args.encoder_embed_path)
utils.print_embed_overlap(encoder_embed_dict, task.source_dictionary)
decoder_embed_dict = None
if args.decoder_embed_path:
decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path)
utils.print_embed_overlap(decoder_embed_dict, task.target_dictionary)
encoder = FConvEncoder(
dictionary=task.source_dictionary,
embed_dim=args.encoder_embed_dim,
embed_dict=encoder_embed_dict,
convolutions=eval(args.encoder_layers),
dropout=args.dropout,
max_positions=args.max_source_positions,
)
decoder = FConvDecoder(
dictionary=task.target_dictionary,
embed_dim=args.decoder_embed_dim,
embed_dict=decoder_embed_dict,
convolutions=eval(args.decoder_layers),
out_embed_dim=args.decoder_out_embed_dim,
attention=eval(args.decoder_attention),
dropout=args.dropout,
max_positions=args.max_target_positions,
share_embed=args.share_input_output_embed,
)
return FConvModel(encoder, decoder)
示例6: build_embedding
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import parse_embedding [as 别名]
def build_embedding(dictionary, embed_dim, path=None, freeze=False):
num_embeddings = len(dictionary)
padding_idx = dictionary.pad()
emb = TransformerTokenEmbedding(num_embeddings, embed_dim, padding_idx, freeze)
# if provided, load from preloaded dictionaries
if path:
embed_dict = utils.parse_embedding(path)
utils.load_embedding(embed_dict, dictionary, emb)
return emb
示例7: build_embedding
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import parse_embedding [as 别名]
def build_embedding(dictionary, embed_dim, path=None):
num_embeddings = len(dictionary)
padding_idx = dictionary.pad()
emb = Embedding(num_embeddings, embed_dim, padding_idx)
# if provided, load from preloaded dictionaries
if path:
embed_dict = utils.parse_embedding(path)
utils.load_embedding(embed_dict, dictionary, emb)
return emb
示例8: build_model
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import parse_embedding [as 别名]
def build_model(cls, args, task):
""" Build a new model instance. """
base_architecture(args)
if not hasattr(args, 'max_source_positions'):
args.max_source_positions = 1024
if not hasattr(args, 'max_target_positions'):
args.max_target_positions = 1024
src_dict, tgt_dict = task.source_dictionary, task.target_dictionary
def build_embedding(dictionary, embed_dim, path=None):
num_embeddings = len(dictionary)
padding_idx = dictionary.pad()
emb = Embedding(num_embeddings, embed_dim, padding_idx)
# if provided, load from preloaded dictionaries
if path:
embed_dict = utils.parse_embedding(path)
utils.load_embedding(embed_dict, dictionary, emb)
return emb
# NOT sharing encoder-decoder embeddings
encoder_embed_tokens = build_embedding(
src_dict, args.encoder_embed_dim, args.encoder_embed_path
)
decoder_embed_tokens = build_embedding(
tgt_dict, args.decoder_embed_dim, args.decoder_embed_path
)
ctrl_encoder_embed_tokens = build_embedding(
src_dict, args.encoder_embed_dim, args.encoder_embed_path
)
ctrl_decoder_embed_tokens = build_embedding(
tgt_dict, args.decoder_embed_dim, args.decoder_embed_path
)
encoder = Attn2dEncoder(args, src_dict, encoder_embed_tokens, ctrl_encoder_embed_tokens)
decoder = Attn2dDecoder(args, tgt_dict, decoder_embed_tokens, ctrl_decoder_embed_tokens)
return cls(encoder, decoder)
示例9: build_model
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import parse_embedding [as 别名]
def build_model(cls, args, task):
"""Build a new model instance."""
# make sure that all args are properly defaulted (in case there are any new ones)
base_architecture(args)
encoder_embed_dict = None
if args.encoder_embed_path:
encoder_embed_dict = utils.parse_embedding(args.encoder_embed_path)
utils.print_embed_overlap(encoder_embed_dict, task.source_dictionary)
auxencoder_embed_dict = None
if args.auxencoder_embed_path:
auxencoder_embed_dict = utils.parse_embedding(args.auxencoder_embed_path)
utils.print_embed_overlap(auxencoder_embed_dict, task.context_dictionary)
decoder_embed_dict = None
if args.decoder_embed_path:
decoder_embed_dict = utils.parse_embedding(args.decoder_embed_path)
utils.print_embed_overlap(decoder_embed_dict, task.target_dictionary)
auxencoder = FConvCustomEncoder(
dictionary=task.context_dictionary,
embed_dim=args.auxencoder_embed_dim,
embed_dict=auxencoder_embed_dict,
convolutions=eval(args.auxencoder_layers),
dropout=args.dropout,
max_positions=args.max_context_positions,
normalization_constant=args.normalization_constant,
)
encoder = FConvCustomEncoder(
dictionary=task.source_dictionary,
embed_dim=args.encoder_embed_dim,
embed_dict=encoder_embed_dict,
convolutions=eval(args.encoder_layers),
dropout=args.dropout,
max_positions=args.max_source_positions,
normalization_constant=args.normalization_constant,
token_dropout=args.source_token_dropout,
)
decoder = FConvCustomDecoder(
dictionary=task.target_dictionary,
embed_dim=args.decoder_embed_dim,
embed_dict=decoder_embed_dict,
convolutions=eval(args.decoder_layers),
out_embed_dim=args.decoder_out_embed_dim,
attention=eval(args.decoder_attention),
dropout=args.dropout,
max_positions=args.max_target_positions,
share_embed=args.share_input_output_embed,
normalization_constant=args.normalization_constant,
)
return FConvDualEncoderModel(auxencoder, encoder, decoder)
示例10: build_model
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import parse_embedding [as 别名]
def build_model(cls, args, task):
"""Build a new model instance."""
# make sure that all args are properly defaulted (in case there are any new ones)
base_architecture(args)
def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim):
num_embeddings = len(dictionary)
padding_idx = dictionary.pad()
embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
embed_dict = utils.parse_embedding(embed_path)
utils.print_embed_overlap(embed_dict, dictionary)
return utils.load_embedding(embed_dict, dictionary, embed_tokens)
pretrained_encoder_embed = None
if args.encoder_embed_path:
pretrained_encoder_embed = load_pretrained_embedding_from_file(
args.encoder_embed_path, task.source_dictionary, args.encoder_embed_dim)
pretrained_decoder_embed = None
if args.decoder_embed_path:
pretrained_decoder_embed = load_pretrained_embedding_from_file(
args.decoder_embed_path, task.target_dictionary, args.decoder_embed_dim)
encoder = LSTMEncoder(
dictionary=task.source_dictionary,
embed_dim=args.encoder_embed_dim,
hidden_size=args.encoder_hidden_size,
num_layers=args.encoder_layers,
dropout_in=args.encoder_dropout_in,
dropout_out=args.encoder_dropout_out,
bidirectional=args.encoder_bidirectional,
pretrained_embed=pretrained_encoder_embed,
)
decoder = LSTMDecoder(
dictionary=task.target_dictionary,
embed_dim=args.decoder_embed_dim,
hidden_size=args.decoder_hidden_size,
out_embed_dim=args.decoder_out_embed_dim,
num_layers=args.decoder_layers,
dropout_in=args.decoder_dropout_in,
dropout_out=args.decoder_dropout_out,
attention=options.eval_bool(args.decoder_attention),
encoder_embed_dim=args.encoder_embed_dim,
encoder_output_units=encoder.output_units,
pretrained_embed=pretrained_decoder_embed,
)
return cls(encoder, decoder)
示例11: build_model
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import parse_embedding [as 别名]
def build_model(cls, args, task):
"""Build a new model instance."""
# make sure all arguments are present in older models
base_architecture(args)
if getattr(args, 'max_target_positions', None) is not None:
max_target_positions = args.max_target_positions
else:
max_target_positions = getattr(args, 'tokens_per_sample', DEFAULT_MAX_TARGET_POSITIONS)
def load_pretrained_embedding_from_file(embed_path, dictionary, embed_dim):
num_embeddings = len(dictionary)
padding_idx = dictionary.pad()
embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
embed_dict = utils.parse_embedding(embed_path)
utils.print_embed_overlap(embed_dict, dictionary)
return utils.load_embedding(embed_dict, dictionary, embed_tokens)
pretrained_decoder_embed = None
if args.decoder_embed_path:
pretrained_decoder_embed = load_pretrained_embedding_from_file(
args.decoder_embed_path,
task.target_dictionary,
args.decoder_embed_dim
)
if args.share_decoder_input_output_embed:
# double check all parameters combinations are valid
if task.source_dictionary != task.target_dictionary:
raise ValueError('--share-decoder-input-output-embeddings requires a joint dictionary')
if args.decoder_embed_dim != args.decoder_out_embed_dim:
raise ValueError(
'--share-decoder-input-output-embeddings requires '
'--decoder-embed-dim to match --decoder-out-embed-dim'
)
decoder = LSTMDecoder(
dictionary=task.dictionary,
embed_dim=args.decoder_embed_dim,
hidden_size=args.decoder_hidden_size,
out_embed_dim=args.decoder_out_embed_dim,
num_layers=args.decoder_layers,
dropout_in=args.decoder_dropout_in,
dropout_out=args.decoder_dropout_out,
attention=False, # decoder-only language model doesn't support attention
encoder_output_units=0,
pretrained_embed=pretrained_decoder_embed,
share_input_output_embed=args.share_decoder_input_output_embed,
adaptive_softmax_cutoff=(
options.eval_str_list(args.adaptive_softmax_cutoff, type=int)
if args.criterion == 'adaptive_loss' else None
),
max_target_positions=max_target_positions,
residuals=args.residuals
)
return cls(decoder)
示例12: build_model
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import parse_embedding [as 别名]
def build_model(cls, args, task):
"""Build a new model instance."""
# make sure all arguments are present in older models
base_architecture(args)
if not hasattr(args, 'max_source_positions'):
args.max_source_positions = 1024
if not hasattr(args, 'max_target_positions'):
args.max_target_positions = 1024
src_dict, tgt_dict = task.source_dictionary, task.target_dictionary
def build_embedding(dictionary, embed_dim, path=None):
num_embeddings = len(dictionary)
padding_idx = dictionary.pad()
emb = Embedding(num_embeddings, embed_dim, padding_idx)
# if provided, load from preloaded dictionaries
if path:
embed_dict = utils.parse_embedding(path)
utils.load_embedding(embed_dict, dictionary, emb)
return emb
if args.share_all_embeddings:
if src_dict != tgt_dict:
raise RuntimeError('--share-all-embeddings requires a joined dictionary')
if args.encoder_embed_dim != args.decoder_embed_dim:
raise RuntimeError(
'--share-all-embeddings requires --encoder-embed-dim to match --decoder-embed-dim')
if args.decoder_embed_path and (
args.decoder_embed_path != args.encoder_embed_path):
raise RuntimeError('--share-all-embeddings not compatible with --decoder-embed-path')
encoder_embed_tokens = build_embedding(
src_dict, args.encoder_embed_dim, args.encoder_embed_path
)
decoder_embed_tokens = encoder_embed_tokens
args.share_decoder_input_output_embed = True
else:
encoder_embed_tokens = build_embedding(
src_dict, args.encoder_embed_dim, args.encoder_embed_path
)
decoder_embed_tokens = build_embedding(
tgt_dict, args.decoder_embed_dim, args.decoder_embed_path
)
encoder = LightConvEncoder(args, src_dict, encoder_embed_tokens)
decoder = LightConvDecoder(args, tgt_dict, decoder_embed_tokens)
return LightConvModel(encoder, decoder)
示例13: build_model
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import parse_embedding [as 别名]
def build_model(cls, args, task):
"""Build a new model instance."""
# make sure all arguments are present in older models
base_architecture(args)
if not hasattr(args, "max_source_positions"):
args.max_source_positions = DEFAULT_MAX_SOURCE_POSITIONS
if not hasattr(args, "max_target_positions"):
args.max_target_positions = DEFAULT_MAX_TARGET_POSITIONS
src_dict, tgt_dict = task.source_dictionary, task.target_dictionary
def build_embedding(dictionary, embed_dim, path=None):
num_embeddings = len(dictionary)
padding_idx = dictionary.pad()
emb = Embedding(num_embeddings, embed_dim, padding_idx)
# if provided, load from preloaded dictionaries
if path:
embed_dict = utils.parse_embedding(path)
utils.load_embedding(embed_dict, dictionary, emb)
return emb
if args.share_all_embeddings:
if src_dict != tgt_dict:
raise ValueError("--share-all-embeddings requires a joined dictionary")
if args.encoder_embed_dim != args.decoder_embed_dim:
raise ValueError(
"--share-all-embeddings requires --encoder-embed-dim "
"to match --decoder-embed-dim"
)
if args.decoder_embed_path and (
args.decoder_embed_path != args.encoder_embed_path
):
raise ValueError(
"--share-all-embeddings not compatible with --decoder-embed-path"
)
encoder_embed_tokens = build_embedding(
src_dict, args.encoder_embed_dim, args.encoder_embed_path
)
decoder_embed_tokens = encoder_embed_tokens
args.share_decoder_input_output_embed = True
else:
encoder_embed_tokens = build_embedding(
src_dict, args.encoder_embed_dim, args.encoder_embed_path
)
decoder_embed_tokens = build_embedding(
tgt_dict, args.decoder_embed_dim, args.decoder_embed_path
)
encoder = cls.build_encoder(args, src_dict, encoder_embed_tokens)
decoder = cls.build_decoder(args, tgt_dict, decoder_embed_tokens)
return TwoPhaseTransformerModel(encoder, decoder)
示例14: build_model
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import parse_embedding [as 别名]
def build_model(cls, args, task):
"""Build a new model instance."""
# make sure all arguments are present in older models
base_architecture(args)
if not hasattr(args, "max_source_positions"):
args.max_source_positions = 1024
if not hasattr(args, "max_target_positions"):
args.max_target_positions = 1024
src_dict, tgt_dict = task.source_dictionary, task.target_dictionary
def build_embedding(dictionary, embed_dim, path=None):
num_embeddings = len(dictionary)
padding_idx = dictionary.pad()
emb = Embedding(num_embeddings, embed_dim, padding_idx)
# if provided, load from preloaded dictionaries
if path:
embed_dict = utils.parse_embedding(path)
utils.load_embedding(embed_dict, dictionary, emb)
return emb
if args.share_all_embeddings:
if src_dict != tgt_dict:
raise RuntimeError(
"--share-all-embeddings requires a joined dictionary"
)
if args.encoder_embed_dim != args.decoder_embed_dim:
raise RuntimeError(
"""--share-all-embeddings requires --encoder-embed-dim \
to match --decoder-embed-dim"""
)
if args.decoder_embed_path and (
args.decoder_embed_path != args.encoder_embed_path
):
raise RuntimeError(
"--share-all-embeddings not compatible with --decoder-embed-path"
)
encoder_embed_tokens = build_embedding(
src_dict, args.encoder_embed_dim, args.encoder_embed_path
)
decoder_embed_tokens = encoder_embed_tokens
args.share_decoder_input_output_embed = True
else:
encoder_embed_tokens = build_embedding(
src_dict, args.encoder_embed_dim, args.encoder_embed_path
)
decoder_embed_tokens = build_embedding(
tgt_dict, args.decoder_embed_dim, args.decoder_embed_path
)
encoder = pytorch_translate_transformer.TransformerEncoder(
args, src_dict, encoder_embed_tokens
)
decoder = TransformerAANDecoder(args, src_dict, tgt_dict, decoder_embed_tokens)
return TransformerAANModel(task, encoder, decoder)
示例15: build_model
# 需要导入模块: from fairseq import utils [as 别名]
# 或者: from fairseq.utils import parse_embedding [as 别名]
def build_model(cls, args, task):
"""Build a new model instance."""
# make sure all arguments are present in older models
base_architecture(args)
if not hasattr(args, 'max_source_positions'):
args.max_source_positions = 1024
if not hasattr(args, 'max_target_positions'):
args.max_target_positions = 1024
transformer_print(key=mlperf_log.INPUT_MAX_LENGTH, value=args.max_source_positions)
src_dict, tgt_dict = task.source_dictionary, task.target_dictionary
def build_embedding(dictionary, embed_dim, path=None):
num_embeddings = len(dictionary)
padding_idx = dictionary.pad()
emb = Embedding(num_embeddings, embed_dim, padding_idx)
# if provided, load from preloaded dictionaries
if path:
embed_dict = utils.parse_embedding(path)
utils.load_embedding(embed_dict, dictionary, emb)
return emb
if args.share_all_embeddings:
if src_dict != tgt_dict:
raise RuntimeError('--share-all-embeddings requires a joined dictionary')
if args.encoder_embed_dim != args.decoder_embed_dim:
raise RuntimeError(
'--share-all-embeddings requires --encoder-embed-dim to match --decoder-embed-dim')
if args.decoder_embed_path and (
args.decoder_embed_path != args.encoder_embed_path):
raise RuntimeError('--share-all-embeddings not compatible with --decoder-embed-path')
transformer_print(key=mlperf_log.MODEL_HP_EMBEDDING_SHARED_WEIGHTS,
value={'hidden_size':args.encoder_embed_dim, 'vocab_size':len(src_dict)})
encoder_embed_tokens = build_embedding(
src_dict, args.encoder_embed_dim, args.encoder_embed_path
)
decoder_embed_tokens = encoder_embed_tokens
args.share_decoder_input_output_embed = True
else:
encoder_embed_tokens = build_embedding(
src_dict, args.encoder_embed_dim, args.encoder_embed_path
)
decoder_embed_tokens = build_embedding(
tgt_dict, args.decoder_embed_dim, args.decoder_embed_path
)
encoder = TransformerEncoder(args, src_dict, encoder_embed_tokens)
decoder = TransformerDecoder(args, tgt_dict, decoder_embed_tokens)
return TransformerModel(encoder, decoder)