当前位置: 首页>>代码示例>>Python>>正文


Python data.BucketIterator方法代码示例

本文整理汇总了Python中torchtext.data.BucketIterator方法的典型用法代码示例。如果您正苦于以下问题:Python data.BucketIterator方法的具体用法?Python data.BucketIterator怎么用?Python data.BucketIterator使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在torchtext.data的用法示例。


在下文中一共展示了data.BucketIterator方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: build_bucket_iterator

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def build_bucket_iterator(dataset, device, batch_size, is_train):
    device_obj = None if device is None else torch.device(device)
    iterator = data.BucketIterator(
        dataset=dataset,
        batch_size=batch_size,
        repeat=False,
        sort_key=dataset.sort_key,
        sort=False,
        # sorts the data within each minibatch in decreasing order
        # set to true if you want use pack_padded_sequences
        sort_within_batch=is_train,
        # shuffle batches
        shuffle=is_train,
        device=device_obj,
        train=is_train,
    )
    return iterator 
开发者ID:Unbabel,项目名称:OpenKiwi,代码行数:19,代码来源:iterators.py

示例2: load_dataloaders

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def load_dataloaders(args):
    logger.info("Preparing dataloaders...")
    FR = torchtext.data.Field(tokenize=dum_tokenizer, lower=True, init_token="<sos>", eos_token="<eos>",\
                              batch_first=True)
    EN = torchtext.data.Field(tokenize=dum_tokenizer, lower=True, batch_first=True)
    
    train_path = os.path.join("./data/", "df.csv")
    if not os.path.isfile(train_path):
        tokenize_data(args)
    train = torchtext.data.TabularDataset(train_path, format="csv", \
                                             fields=[("EN", EN), ("FR", FR)])
    FR.build_vocab(train)
    EN.build_vocab(train)
    train_iter = BucketIterator(train, batch_size=args.batch_size, repeat=False, sort_key=lambda x: (len(x["EN"]), len(x["FR"])),\
                                shuffle=True, train=True)
    train_length = len(train)
    logger.info("Loaded dataloaders.")
    return train_iter, FR, EN, train_length 
开发者ID:plkmo,项目名称:NLP_Toolkit,代码行数:20,代码来源:preprocessing_funcs.py

示例3: prepare_dataloaders

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def prepare_dataloaders(opt, device):
    batch_size = opt.batch_size
    data = pickle.load(open(opt.data_pkl, 'rb'))

    opt.max_token_seq_len = data['settings'].max_len
    opt.src_pad_idx = data['vocab']['src'].vocab.stoi[Constants.PAD_WORD]
    opt.trg_pad_idx = data['vocab']['trg'].vocab.stoi[Constants.PAD_WORD]

    opt.src_vocab_size = len(data['vocab']['src'].vocab)
    opt.trg_vocab_size = len(data['vocab']['trg'].vocab)

    #========= Preparing Model =========#
    if opt.embs_share_weight:
        assert data['vocab']['src'].vocab.stoi == data['vocab']['trg'].vocab.stoi, \
            'To sharing word embedding the src/trg word2idx table shall be the same.'

    fields = {'src': data['vocab']['src'], 'trg':data['vocab']['trg']}

    train = Dataset(examples=data['train'], fields=fields)
    val = Dataset(examples=data['valid'], fields=fields)

    train_iterator = BucketIterator(train, batch_size=batch_size, device=device, train=True)
    val_iterator = BucketIterator(val, batch_size=batch_size, device=device)

    return train_iterator, val_iterator 
开发者ID:jadore801120,项目名称:attention-is-all-you-need-pytorch,代码行数:27,代码来源:train.py

示例4: csv_data_loader

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def csv_data_loader(file_path,fields,split_ratio=None,split_seed=None,skip_header=False,save_vocab_path=os.getcwd(),batch_size=32,device=None,train=True,**args):
    """

    :param file_path:
    :param fields:
    :param split_ratio:
    :param split_seed:
    :param skip_header:
    :param save_vocab_path:
    :param batch_size:
    :param device:
    :param train:
    :param args:
    :return:
    """
    dataset = load_tabular_set(file_path,"csv",fields=fields,split_ratio=split_ratio,split_seed=split_seed,skip_header=skip_header,save_vocab_path=save_vocab_path,**args)
    return BucketIterator(dataset,batch_size=batch_size,device=device,train=True,shuffle=train,repeat=False) 
开发者ID:johnolafenwa,项目名称:TorchFusion,代码行数:19,代码来源:datasets.py

示例5: csv_data_split_loader

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def csv_data_split_loader(root_path,fields,train=None,val=None,test=None,skip_header=False,save_vocab_path=os.getcwd(),batch_size=32,device=None,**args):
    """

    :param root_path:
    :param fields:
    :param train:
    :param val:
    :param test:
    :param skip_header:
    :param save_vocab_path:
    :param batch_size:
    :param device:
    :param args:
    :return:
    """
    dataset = load_tabular_set_split(root_path,"csv",fields=fields,train=train,val=val, test=test,skip_header=skip_header,save_vocab_path=save_vocab_path,**args)
    return BucketIterator(dataset, batch_size=batch_size, device=device, train=True, shuffle=train,repeat=False) 
开发者ID:johnolafenwa,项目名称:TorchFusion,代码行数:19,代码来源:datasets.py

示例6: tsv_data_loader

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def tsv_data_loader(file_path,fields,split_ratio=None,split_seed=None,skip_header=False,save_vocab_path=os.getcwd(),batch_size=32,device=None,train=True,**args):
    """

    :param file_path:
    :param fields:
    :param split_ratio:
    :param split_seed:
    :param skip_header:
    :param save_vocab_path:
    :param batch_size:
    :param device:
    :param train:
    :param args:
    :return:
    """
    dataset = load_tabular_set(file_path,"tsv",fields=fields,split_ratio=split_ratio,split_seed=split_seed,skip_header=skip_header,save_vocab_path=save_vocab_path,**args)
    return BucketIterator(dataset, batch_size=batch_size, device=device, train=True, shuffle=train,repeat=False) 
开发者ID:johnolafenwa,项目名称:TorchFusion,代码行数:19,代码来源:datasets.py

示例7: tsv_data_split_loader

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def tsv_data_split_loader(root_path,fields,train=None,val=None,test=None,skip_header=False,save_vocab_path=os.getcwd(),batch_size=32,device=None,**args):
    """

    :param root_path:
    :param fields:
    :param train:
    :param val:
    :param test:
    :param skip_header:
    :param save_vocab_path:
    :param batch_size:
    :param device:
    :param args:
    :return:
    """
    dataset = load_tabular_set_split(root_path,"tsv",fields=fields,train=train,val=val,test=test,skip_header=skip_header,save_vocab_path=save_vocab_path,**args)
    return BucketIterator(dataset, batch_size=batch_size, device=device, train=True, shuffle=train,repeat=False) 
开发者ID:johnolafenwa,项目名称:TorchFusion,代码行数:19,代码来源:datasets.py

示例8: json_data_loader

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def json_data_loader(file_path,fields,split_ratio=None,split_seed=None,skip_header=False,save_vocab_path=os.getcwd(),batch_size=32,device=None,train=True,**args):
    """

    :param file_path:
    :param fields:
    :param split_ratio:
    :param split_seed:
    :param skip_header:
    :param save_vocab_path:
    :param batch_size:
    :param device:
    :param train:
    :param args:
    :return:
    """
    dataset = load_tabular_set(file_path,"json",fields=fields,split_ratio=split_ratio,split_seed=split_seed,skip_header=skip_header,save_vocab_path=save_vocab_path,**args)
    return BucketIterator(dataset, batch_size=batch_size, device=device, train=True, shuffle=train,repeat=False) 
开发者ID:johnolafenwa,项目名称:TorchFusion,代码行数:19,代码来源:datasets.py

示例9: __init__

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def __init__(self, args):
        self.RAW = data.RawField()
        self.RAW.is_target = False
        tokenize = lambda x: list(x)
        self.TEXT = data.Field(batch_first=True, tokenize=tokenize)
        self.LABEL = data.Field(sequential=False, unk_token=None)
        self.train, self.dev, self.test = data.TabularDataset.splits(
            path='/data/nfsdata/nlp/datasets/sentence_pair/bq_corpus_torch10',
            train='BQ_train.json',
            validation='BQ_dev.json',
            test='BQ_test.json',
            format='json',
            fields={"gold_label": ("label", self.LABEL),
                    "sentence1": ("q1", self.TEXT),
                    "sentence2": ("q2", self.TEXT),
                    "ID": ("id", self.RAW)})

        self.TEXT.build_vocab(self.train, self.dev, self.test, vectors=Vectors("BQ300", args.data))
        self.LABEL.build_vocab(self.train)

        sort_key = lambda x: data.interleave_keys(len(x.q1), len(x.q2))
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.train_iter = data.BucketIterator(self.train, batch_size=args.batch_size, device=device, sort_key=sort_key, sort=True)
        self.dev_iter = data.BucketIterator(self.dev, batch_size=args.batch_size, device=device, sort_key=sort_key, sort=True)
        self.test_iter = data.BucketIterator(self.test, batch_size=args.batch_size, device=device, sort_key=sort_key, sort=True) 
开发者ID:ShannonAI,项目名称:glyce,代码行数:27,代码来源:semantic_similar_data.py

示例10: get_iterator

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def get_iterator(self, dataset: Dataset, batch_size=DEFAULT_CONFIG['batch_size'], device=DEVICE,
                 sort_key=lambda x: len(x.texta)):
        return BucketIterator(dataset, batch_size=batch_size, device=device, sort_key=sort_key) 
开发者ID:smilelight,项目名称:lightNLP,代码行数:5,代码来源:tool.py

示例11: get_iterator

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def get_iterator(self, dataset: Dataset, batch_size=DEFAULT_CONFIG['batch_size'], device=DEVICE,
                     sort_key=lambda x: len(x.text), sort_within_batch=True):
        return BucketIterator(dataset, batch_size=batch_size, device=device, sort_key=sort_key,
                              sort_within_batch=sort_within_batch) 
开发者ID:smilelight,项目名称:lightNLP,代码行数:6,代码来源:tool.py

示例12: get_iterator

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def get_iterator(self, dataset: Dataset, batch_size=DEFAULT_CONFIG['batch_size'], device=DEVICE):
        return BucketIterator(dataset, batch_size=batch_size, device=device) 
开发者ID:smilelight,项目名称:lightNLP,代码行数:4,代码来源:tool.py

示例13: get_iterator

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def get_iterator(self, dataset: Dataset, batch_size=DEFAULT_CONFIG['batch_size'], device=DEVICE,
                     sort_key=lambda x: len(x.word), sort_within_batch=True):
        return BucketIterator(dataset, batch_size=batch_size, device=device, sort_key=sort_key,
                              sort_within_batch=sort_within_batch) 
开发者ID:smilelight,项目名称:lightNLP,代码行数:6,代码来源:tool.py

示例14: get_iterator

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def get_iterator(self, dataset: Dataset, batch_size=DEFAULT_CONFIG['batch_size'], device=DEVICE,
                     sort_key=lambda x: len(x.query), sort_within_batch=True):
        return BucketIterator(dataset, batch_size=batch_size, device=device, sort_key=sort_key,
                              sort_within_batch=sort_within_batch) 
开发者ID:smilelight,项目名称:lightNLP,代码行数:6,代码来源:tool.py

示例15: get_iterator

# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def get_iterator(self, dataset: Dataset, batch_size=DEFAULT_CONFIG['batch_size'], device=DEVICE,
                     sort_key=lambda x: len(x.source), sort_within_batch=True):
        return BucketIterator(dataset, batch_size=batch_size, device=device, sort_key=sort_key,
                              sort_within_batch=sort_within_batch) 
开发者ID:smilelight,项目名称:lightNLP,代码行数:6,代码来源:tool.py


注:本文中的torchtext.data.BucketIterator方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。