當前位置: 首頁>>代碼示例>>Python>>正文


Python onmt.inputters方法代碼示例

本文整理匯總了Python中onmt.inputters方法的典型用法代碼示例。如果您正苦於以下問題:Python onmt.inputters方法的具體用法?Python onmt.inputters怎麽用?Python onmt.inputters使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在onmt的用法示例。


在下文中一共展示了onmt.inputters方法的11個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: dataset_build

# 需要導入模塊: import onmt [as 別名]
# 或者: from onmt import inputters [as 別名]
def dataset_build(self, opt):
        fields = onmt.inputters.get_fields("text", 0, 0)

        if hasattr(opt, 'src_vocab') and len(opt.src_vocab) > 0:
            with codecs.open(opt.src_vocab, 'w', 'utf-8') as f:
                f.write('a\nb\nc\nd\ne\nf\n')
        if hasattr(opt, 'tgt_vocab') and len(opt.tgt_vocab) > 0:
            with codecs.open(opt.tgt_vocab, 'w', 'utf-8') as f:
                f.write('a\nb\nc\nd\ne\nf\n')

        train_data_files = preprocess.build_save_dataset('train', fields, opt)

        preprocess.build_save_vocab(train_data_files, fields, opt)

        preprocess.build_save_dataset('valid', fields, opt)

        # Remove the generated *pt files.
        for pt in glob.glob(SAVE_DATA_PREFIX + '*.pt'):
            os.remove(pt)
        if hasattr(opt, 'src_vocab') and os.path.exists(opt.src_vocab):
            os.remove(opt.src_vocab)
        if hasattr(opt, 'tgt_vocab') and os.path.exists(opt.tgt_vocab):
            os.remove(opt.tgt_vocab) 
開發者ID:lizekang,項目名稱:ITDD,代碼行數:25,代碼來源:test_preprocess.py

示例2: dataset_build

# 需要導入模塊: import onmt [as 別名]
# 或者: from onmt import inputters [as 別名]
def dataset_build(self, opt):
        fields = onmt.inputters.get_fields("text", 0, 0)

        if hasattr(opt, 'src_vocab') and len(opt.src_vocab) > 0:
            with codecs.open(opt.src_vocab, 'w', 'utf-8') as f:
                f.write('a\nb\nc\nd\ne\nf\n')
        if hasattr(opt, 'tgt_vocab') and len(opt.tgt_vocab) > 0:
            with codecs.open(opt.tgt_vocab, 'w', 'utf-8') as f:
                f.write('a\nb\nc\nd\ne\nf\n')

        src_reader = onmt.inputters.str2reader[opt.data_type].from_opt(opt)
        tgt_reader = onmt.inputters.str2reader["text"].from_opt(opt)
        preprocess.build_save_dataset(
            'train', fields, src_reader, tgt_reader, opt)

        preprocess.build_save_dataset(
            'valid', fields, src_reader, tgt_reader, opt)

        # Remove the generated *pt files.
        for pt in glob.glob(SAVE_DATA_PREFIX + '*.pt'):
            os.remove(pt)
        if hasattr(opt, 'src_vocab') and os.path.exists(opt.src_vocab):
            os.remove(opt.src_vocab)
        if hasattr(opt, 'tgt_vocab') and os.path.exists(opt.tgt_vocab):
            os.remove(opt.tgt_vocab) 
開發者ID:memray,項目名稱:OpenNMT-kpg-release,代碼行數:27,代碼來源:test_preprocess.py

示例3: get_field

# 需要導入模塊: import onmt [as 別名]
# 或者: from onmt import inputters [as 別名]
def get_field(self):
        src = onmt.inputters.get_fields("text", 0, 0)["src"]
        src.build_vocab([])
        return src 
開發者ID:lizekang,項目名稱:ITDD,代碼行數:6,代碼來源:test_models.py

示例4: dataset_build

# 需要導入模塊: import onmt [as 別名]
# 或者: from onmt import inputters [as 別名]
def dataset_build(self, opt):
        fields = onmt.inputters.get_fields("text", 0, 0)

        if hasattr(opt, 'src_vocab') and len(opt.src_vocab) > 0:
            with codecs.open(opt.src_vocab, 'w', 'utf-8') as f:
                f.write('a\nb\nc\nd\ne\nf\n')
        if hasattr(opt, 'tgt_vocab') and len(opt.tgt_vocab) > 0:
            with codecs.open(opt.tgt_vocab, 'w', 'utf-8') as f:
                f.write('a\nb\nc\nd\ne\nf\n')

        src_reader = onmt.inputters.str2reader[opt.data_type].from_opt(opt)
        tgt_reader = onmt.inputters.str2reader["text"].from_opt(opt)
        train_data_files = preprocess.build_save_dataset(
            'train', fields, src_reader, tgt_reader, opt)

        preprocess.build_save_vocab(train_data_files, fields, opt)

        preprocess.build_save_dataset(
            'valid', fields, src_reader, tgt_reader, opt)

        # Remove the generated *pt files.
        for pt in glob.glob(SAVE_DATA_PREFIX + '*.pt'):
            os.remove(pt)
        if hasattr(opt, 'src_vocab') and os.path.exists(opt.src_vocab):
            os.remove(opt.src_vocab)
        if hasattr(opt, 'tgt_vocab') and os.path.exists(opt.tgt_vocab):
            os.remove(opt.tgt_vocab) 
開發者ID:harvardnlp,項目名稱:encoder-agnostic-adaptation,代碼行數:29,代碼來源:test_preprocess.py

示例5: get_field

# 需要導入模塊: import onmt [as 別名]
# 或者: from onmt import inputters [as 別名]
def get_field(self):
        src = onmt.inputters.get_fields("text", 0, 0)["src"]
        src.base_field.build_vocab([])
        return src 
開發者ID:harvardnlp,項目名稱:encoder-agnostic-adaptation,代碼行數:6,代碼來源:test_models.py

示例6: dataset_build

# 需要導入模塊: import onmt [as 別名]
# 或者: from onmt import inputters [as 別名]
def dataset_build(self, opt):
        fields = onmt.inputters.get_fields("text", 0, 0)

        if hasattr(opt, 'src_vocab') and len(opt.src_vocab) > 0:
            with codecs.open(opt.src_vocab, 'w', 'utf-8') as f:
                f.write('a\nb\nc\nd\ne\nf\n')
        if hasattr(opt, 'tgt_vocab') and len(opt.tgt_vocab) > 0:
            with codecs.open(opt.tgt_vocab, 'w', 'utf-8') as f:
                f.write('a\nb\nc\nd\ne\nf\n')

        src_reader = onmt.inputters.str2reader[opt.data_type].from_opt(opt)
        tgt_reader = onmt.inputters.str2reader["text"].from_opt(opt)
        align_reader = onmt.inputters.str2reader["text"].from_opt(opt)
        preprocess.build_save_dataset(
            'train', fields, src_reader, tgt_reader, align_reader, opt)

        preprocess.build_save_dataset(
            'valid', fields, src_reader, tgt_reader, align_reader, opt)

        # Remove the generated *pt files.
        for pt in glob.glob(SAVE_DATA_PREFIX + '*.pt'):
            os.remove(pt)
        if hasattr(opt, 'src_vocab') and os.path.exists(opt.src_vocab):
            os.remove(opt.src_vocab)
        if hasattr(opt, 'tgt_vocab') and os.path.exists(opt.tgt_vocab):
            os.remove(opt.tgt_vocab) 
開發者ID:OpenNMT,項目名稱:OpenNMT-py,代碼行數:28,代碼來源:test_preprocess.py

示例7: __init__

# 需要導入模塊: import onmt [as 別名]
# 或者: from onmt import inputters [as 別名]
def __init__(self, generator, tgt_vocab):
        super(LossComputeBase, self).__init__()
        self.generator = generator
        self.tgt_vocab = tgt_vocab
        self.padding_idx = tgt_vocab.stoi[inputters.PAD_WORD] 
開發者ID:InitialBug,項目名稱:BiSET,代碼行數:7,代碼來源:loss.py

示例8: test_merge_vocab

# 需要導入模塊: import onmt [as 別名]
# 或者: from onmt import inputters [as 別名]
def test_merge_vocab(self):
        va = torchtext.vocab.Vocab(Counter('abbccc'))
        vb = torchtext.vocab.Vocab(Counter('eeabbcccf'))

        merged = onmt.inputters.merge_vocabs([va, vb], 2)

        self.assertEqual(Counter({'c': 6, 'b': 4, 'a': 2, 'e': 2, 'f': 1}),
                         merged.freqs)
        # 4 specicials + 2 words (since we pass 2 to merge_vocabs)
        self.assertEqual(6, len(merged.itos))
        self.assertTrue('b' in merged.itos) 
開發者ID:InitialBug,項目名稱:BiSET,代碼行數:13,代碼來源:test_preprocess.py

示例9: get_vocab

# 需要導入模塊: import onmt [as 別名]
# 或者: from onmt import inputters [as 別名]
def get_vocab(self):
        src = onmt.inputters.get_fields("text", 0, 0)["src"]
        src.build_vocab([])
        return src.vocab 
開發者ID:InitialBug,項目名稱:BiSET,代碼行數:6,代碼來源:test_models.py

示例10: main

# 需要導入模塊: import onmt [as 別名]
# 或者: from onmt import inputters [as 別名]
def main():
    dummy_parser = argparse.ArgumentParser(description='train.py')
    onmt.opts.model_opts(dummy_parser)
    dummy_opt = dummy_parser.parse_known_args([])[0]
    opt = parser.parse_args()
    opt.cuda = opt.gpu > -1
    if opt.cuda:
        torch.cuda.set_device(opt.gpu)

    # Add in default model arguments, possibly added since training.
    checkpoint = torch.load(opt.model,
                            map_location=lambda storage, loc: storage)
    model_opt = checkpoint['opt']

    src_dict, tgt_dict = None, None

    # the vocab object is a list of tuple (name, torchtext.Vocab)
    # we iterate over this list and associate vocabularies based on the name
    for vocab in checkpoint['vocab']:
        if vocab[0] == 'src':
            src_dict = vocab[1]
        if vocab[0] == 'tgt':
            tgt_dict = vocab[1]
    assert src_dict is not None and tgt_dict is not None

    fields = onmt.inputters.load_fields_from_vocab(checkpoint['vocab'])

    model_opt = checkpoint['opt']
    for arg in dummy_opt.__dict__:
        if arg not in model_opt:
            model_opt.__dict__[arg] = dummy_opt.__dict__[arg]

    model = onmt.model_builder.build_base_model(
        model_opt, fields, use_gpu(opt), checkpoint)
    encoder = model.encoder
    decoder = model.decoder

    encoder_embeddings = encoder.embeddings.word_lut.weight.data.tolist()
    decoder_embeddings = decoder.embeddings.word_lut.weight.data.tolist()

    logger.info("Writing source embeddings")
    write_embeddings(opt.output_dir + "/src_embeddings.txt", src_dict,
                     encoder_embeddings)

    logger.info("Writing target embeddings")
    write_embeddings(opt.output_dir + "/tgt_embeddings.txt", tgt_dict,
                     decoder_embeddings)

    logger.info('... done.')
    logger.info('Converting model...') 
開發者ID:lizekang,項目名稱:ITDD,代碼行數:52,代碼來源:extract_embeddings.py

示例11: main

# 需要導入模塊: import onmt [as 別名]
# 或者: from onmt import inputters [as 別名]
def main():
    dummy_parser = argparse.ArgumentParser(description='train.py')
    onmt.opts.model_opts(dummy_parser)
    dummy_opt = dummy_parser.parse_known_args([])[0]
    opt = parser.parse_args()
    opt.cuda = opt.gpu > -1
    if opt.cuda:
        torch.cuda.set_device(opt.gpu)

    # Add in default model arguments, possibly added since training.
    checkpoint = torch.load(opt.model,
                            map_location=lambda storage, loc: storage)
    model_opt = checkpoint['opt']

    vocab = checkpoint['vocab']
    if inputters.old_style_vocab(vocab):
        fields = onmt.inputters.load_old_vocab(vocab)
    else:
        fields = vocab
    src_dict = fields['src'].base_field.vocab  # assumes src is text
    tgt_dict = fields['tgt'].base_field.vocab

    model_opt = checkpoint['opt']
    for arg in dummy_opt.__dict__:
        if arg not in model_opt:
            model_opt.__dict__[arg] = dummy_opt.__dict__[arg]

    model = onmt.model_builder.build_base_model(
        model_opt, fields, use_gpu(opt), checkpoint)
    encoder = model.encoder
    decoder = model.decoder

    encoder_embeddings = encoder.embeddings.word_lut.weight.data.tolist()
    decoder_embeddings = decoder.embeddings.word_lut.weight.data.tolist()

    logger.info("Writing source embeddings")
    write_embeddings(opt.output_dir + "/src_embeddings.txt", src_dict,
                     encoder_embeddings)

    logger.info("Writing target embeddings")
    write_embeddings(opt.output_dir + "/tgt_embeddings.txt", tgt_dict,
                     decoder_embeddings)

    logger.info('... done.')
    logger.info('Converting model...') 
開發者ID:OpenNMT,項目名稱:OpenNMT-py,代碼行數:47,代碼來源:extract_embeddings.py


注:本文中的onmt.inputters方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。