當前位置: 首頁>>代碼示例>>Python>>正文


Python data.BucketIterator方法代碼示例

本文整理匯總了Python中torchtext.data.BucketIterator方法的典型用法代碼示例。如果您正苦於以下問題:Python data.BucketIterator方法的具體用法?Python data.BucketIterator怎麽用?Python data.BucketIterator使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在torchtext.data的用法示例。


在下文中一共展示了data.BucketIterator方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: build_bucket_iterator

# 需要導入模塊: from torchtext import data [as 別名]
# 或者: from torchtext.data import BucketIterator [as 別名]
def build_bucket_iterator(dataset, device, batch_size, is_train):
    device_obj = None if device is None else torch.device(device)
    iterator = data.BucketIterator(
        dataset=dataset,
        batch_size=batch_size,
        repeat=False,
        sort_key=dataset.sort_key,
        sort=False,
        # sorts the data within each minibatch in decreasing order
        # set to true if you want use pack_padded_sequences
        sort_within_batch=is_train,
        # shuffle batches
        shuffle=is_train,
        device=device_obj,
        train=is_train,
    )
    return iterator 
開發者ID:Unbabel,項目名稱:OpenKiwi,代碼行數:19,代碼來源:iterators.py

示例2: load_dataloaders

# 需要導入模塊: from torchtext import data [as 別名]
# 或者: from torchtext.data import BucketIterator [as 別名]
def load_dataloaders(args):
    logger.info("Preparing dataloaders...")
    FR = torchtext.data.Field(tokenize=dum_tokenizer, lower=True, init_token="<sos>", eos_token="<eos>",\
                              batch_first=True)
    EN = torchtext.data.Field(tokenize=dum_tokenizer, lower=True, batch_first=True)
    
    train_path = os.path.join("./data/", "df.csv")
    if not os.path.isfile(train_path):
        tokenize_data(args)
    train = torchtext.data.TabularDataset(train_path, format="csv", \
                                             fields=[("EN", EN), ("FR", FR)])
    FR.build_vocab(train)
    EN.build_vocab(train)
    train_iter = BucketIterator(train, batch_size=args.batch_size, repeat=False, sort_key=lambda x: (len(x["EN"]), len(x["FR"])),\
                                shuffle=True, train=True)
    train_length = len(train)
    logger.info("Loaded dataloaders.")
    return train_iter, FR, EN, train_length 
開發者ID:plkmo,項目名稱:NLP_Toolkit,代碼行數:20,代碼來源:preprocessing_funcs.py

示例3: prepare_dataloaders

# 需要導入模塊: from torchtext import data [as 別名]
# 或者: from torchtext.data import BucketIterator [as 別名]
def prepare_dataloaders(opt, device):
    batch_size = opt.batch_size
    data = pickle.load(open(opt.data_pkl, 'rb'))

    opt.max_token_seq_len = data['settings'].max_len
    opt.src_pad_idx = data['vocab']['src'].vocab.stoi[Constants.PAD_WORD]
    opt.trg_pad_idx = data['vocab']['trg'].vocab.stoi[Constants.PAD_WORD]

    opt.src_vocab_size = len(data['vocab']['src'].vocab)
    opt.trg_vocab_size = len(data['vocab']['trg'].vocab)

    #========= Preparing Model =========#
    if opt.embs_share_weight:
        assert data['vocab']['src'].vocab.stoi == data['vocab']['trg'].vocab.stoi, \
            'To sharing word embedding the src/trg word2idx table shall be the same.'

    fields = {'src': data['vocab']['src'], 'trg':data['vocab']['trg']}

    train = Dataset(examples=data['train'], fields=fields)
    val = Dataset(examples=data['valid'], fields=fields)

    train_iterator = BucketIterator(train, batch_size=batch_size, device=device, train=True)
    val_iterator = BucketIterator(val, batch_size=batch_size, device=device)

    return train_iterator, val_iterator 
開發者ID:jadore801120,項目名稱:attention-is-all-you-need-pytorch,代碼行數:27,代碼來源:train.py

示例4: csv_data_loader

# 需要導入模塊: from torchtext import data [as 別名]
# 或者: from torchtext.data import BucketIterator [as 別名]
def csv_data_loader(file_path,fields,split_ratio=None,split_seed=None,skip_header=False,save_vocab_path=os.getcwd(),batch_size=32,device=None,train=True,**args):
    """

    :param file_path:
    :param fields:
    :param split_ratio:
    :param split_seed:
    :param skip_header:
    :param save_vocab_path:
    :param batch_size:
    :param device:
    :param train:
    :param args:
    :return:
    """
    dataset = load_tabular_set(file_path,"csv",fields=fields,split_ratio=split_ratio,split_seed=split_seed,skip_header=skip_header,save_vocab_path=save_vocab_path,**args)
    return BucketIterator(dataset,batch_size=batch_size,device=device,train=True,shuffle=train,repeat=False) 
開發者ID:johnolafenwa,項目名稱:TorchFusion,代碼行數:19,代碼來源:datasets.py

示例5: csv_data_split_loader

# 需要導入模塊: from torchtext import data [as 別名]
# 或者: from torchtext.data import BucketIterator [as 別名]
def csv_data_split_loader(root_path,fields,train=None,val=None,test=None,skip_header=False,save_vocab_path=os.getcwd(),batch_size=32,device=None,**args):
    """

    :param root_path:
    :param fields:
    :param train:
    :param val:
    :param test:
    :param skip_header:
    :param save_vocab_path:
    :param batch_size:
    :param device:
    :param args:
    :return:
    """
    dataset = load_tabular_set_split(root_path,"csv",fields=fields,train=train,val=val, test=test,skip_header=skip_header,save_vocab_path=save_vocab_path,**args)
    return BucketIterator(dataset, batch_size=batch_size, device=device, train=True, shuffle=train,repeat=False) 
開發者ID:johnolafenwa,項目名稱:TorchFusion,代碼行數:19,代碼來源:datasets.py

示例6: tsv_data_loader

# 需要導入模塊: from torchtext import data [as 別名]
# 或者: from torchtext.data import BucketIterator [as 別名]
def tsv_data_loader(file_path,fields,split_ratio=None,split_seed=None,skip_header=False,save_vocab_path=os.getcwd(),batch_size=32,device=None,train=True,**args):
    """

    :param file_path:
    :param fields:
    :param split_ratio:
    :param split_seed:
    :param skip_header:
    :param save_vocab_path:
    :param batch_size:
    :param device:
    :param train:
    :param args:
    :return:
    """
    dataset = load_tabular_set(file_path,"tsv",fields=fields,split_ratio=split_ratio,split_seed=split_seed,skip_header=skip_header,save_vocab_path=save_vocab_path,**args)
    return BucketIterator(dataset, batch_size=batch_size, device=device, train=True, shuffle=train,repeat=False) 
開發者ID:johnolafenwa,項目名稱:TorchFusion,代碼行數:19,代碼來源:datasets.py

示例7: tsv_data_split_loader

# 需要導入模塊: from torchtext import data [as 別名]
# 或者: from torchtext.data import BucketIterator [as 別名]
def tsv_data_split_loader(root_path,fields,train=None,val=None,test=None,skip_header=False,save_vocab_path=os.getcwd(),batch_size=32,device=None,**args):
    """

    :param root_path:
    :param fields:
    :param train:
    :param val:
    :param test:
    :param skip_header:
    :param save_vocab_path:
    :param batch_size:
    :param device:
    :param args:
    :return:
    """
    dataset = load_tabular_set_split(root_path,"tsv",fields=fields,train=train,val=val,test=test,skip_header=skip_header,save_vocab_path=save_vocab_path,**args)
    return BucketIterator(dataset, batch_size=batch_size, device=device, train=True, shuffle=train,repeat=False) 
開發者ID:johnolafenwa,項目名稱:TorchFusion,代碼行數:19,代碼來源:datasets.py

示例8: json_data_loader

# 需要導入模塊: from torchtext import data [as 別名]
# 或者: from torchtext.data import BucketIterator [as 別名]
def json_data_loader(file_path,fields,split_ratio=None,split_seed=None,skip_header=False,save_vocab_path=os.getcwd(),batch_size=32,device=None,train=True,**args):
    """

    :param file_path:
    :param fields:
    :param split_ratio:
    :param split_seed:
    :param skip_header:
    :param save_vocab_path:
    :param batch_size:
    :param device:
    :param train:
    :param args:
    :return:
    """
    dataset = load_tabular_set(file_path,"json",fields=fields,split_ratio=split_ratio,split_seed=split_seed,skip_header=skip_header,save_vocab_path=save_vocab_path,**args)
    return BucketIterator(dataset, batch_size=batch_size, device=device, train=True, shuffle=train,repeat=False) 
開發者ID:johnolafenwa,項目名稱:TorchFusion,代碼行數:19,代碼來源:datasets.py

示例9: __init__

# 需要導入模塊: from torchtext import data [as 別名]
# 或者: from torchtext.data import BucketIterator [as 別名]
def __init__(self, args):
        self.RAW = data.RawField()
        self.RAW.is_target = False
        tokenize = lambda x: list(x)
        self.TEXT = data.Field(batch_first=True, tokenize=tokenize)
        self.LABEL = data.Field(sequential=False, unk_token=None)
        self.train, self.dev, self.test = data.TabularDataset.splits(
            path='/data/nfsdata/nlp/datasets/sentence_pair/bq_corpus_torch10',
            train='BQ_train.json',
            validation='BQ_dev.json',
            test='BQ_test.json',
            format='json',
            fields={"gold_label": ("label", self.LABEL),
                    "sentence1": ("q1", self.TEXT),
                    "sentence2": ("q2", self.TEXT),
                    "ID": ("id", self.RAW)})

        self.TEXT.build_vocab(self.train, self.dev, self.test, vectors=Vectors("BQ300", args.data))
        self.LABEL.build_vocab(self.train)

        sort_key = lambda x: data.interleave_keys(len(x.q1), len(x.q2))
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.train_iter = data.BucketIterator(self.train, batch_size=args.batch_size, device=device, sort_key=sort_key, sort=True)
        self.dev_iter = data.BucketIterator(self.dev, batch_size=args.batch_size, device=device, sort_key=sort_key, sort=True)
        self.test_iter = data.BucketIterator(self.test, batch_size=args.batch_size, device=device, sort_key=sort_key, sort=True) 
開發者ID:ShannonAI,項目名稱:glyce,代碼行數:27,代碼來源:semantic_similar_data.py

示例10: get_iterator

# 需要導入模塊: from torchtext import data [as 別名]
# 或者: from torchtext.data import BucketIterator [as 別名]
def get_iterator(self, dataset: Dataset, batch_size=DEFAULT_CONFIG['batch_size'], device=DEVICE,
                 sort_key=lambda x: len(x.texta)):
        return BucketIterator(dataset, batch_size=batch_size, device=device, sort_key=sort_key) 
開發者ID:smilelight,項目名稱:lightNLP,代碼行數:5,代碼來源:tool.py

示例11: get_iterator

# 需要導入模塊: from torchtext import data [as 別名]
# 或者: from torchtext.data import BucketIterator [as 別名]
def get_iterator(self, dataset: Dataset, batch_size=DEFAULT_CONFIG['batch_size'], device=DEVICE,
                     sort_key=lambda x: len(x.text), sort_within_batch=True):
        return BucketIterator(dataset, batch_size=batch_size, device=device, sort_key=sort_key,
                              sort_within_batch=sort_within_batch) 
開發者ID:smilelight,項目名稱:lightNLP,代碼行數:6,代碼來源:tool.py

示例12: get_iterator

# 需要導入模塊: from torchtext import data [as 別名]
# 或者: from torchtext.data import BucketIterator [as 別名]
def get_iterator(self, dataset: Dataset, batch_size=DEFAULT_CONFIG['batch_size'], device=DEVICE):
        return BucketIterator(dataset, batch_size=batch_size, device=device) 
開發者ID:smilelight,項目名稱:lightNLP,代碼行數:4,代碼來源:tool.py

示例13: get_iterator

# 需要導入模塊: from torchtext import data [as 別名]
# 或者: from torchtext.data import BucketIterator [as 別名]
def get_iterator(self, dataset: Dataset, batch_size=DEFAULT_CONFIG['batch_size'], device=DEVICE,
                     sort_key=lambda x: len(x.word), sort_within_batch=True):
        return BucketIterator(dataset, batch_size=batch_size, device=device, sort_key=sort_key,
                              sort_within_batch=sort_within_batch) 
開發者ID:smilelight,項目名稱:lightNLP,代碼行數:6,代碼來源:tool.py

示例14: get_iterator

# 需要導入模塊: from torchtext import data [as 別名]
# 或者: from torchtext.data import BucketIterator [as 別名]
def get_iterator(self, dataset: Dataset, batch_size=DEFAULT_CONFIG['batch_size'], device=DEVICE,
                     sort_key=lambda x: len(x.query), sort_within_batch=True):
        return BucketIterator(dataset, batch_size=batch_size, device=device, sort_key=sort_key,
                              sort_within_batch=sort_within_batch) 
開發者ID:smilelight,項目名稱:lightNLP,代碼行數:6,代碼來源:tool.py

示例15: get_iterator

# 需要導入模塊: from torchtext import data [as 別名]
# 或者: from torchtext.data import BucketIterator [as 別名]
def get_iterator(self, dataset: Dataset, batch_size=DEFAULT_CONFIG['batch_size'], device=DEVICE,
                     sort_key=lambda x: len(x.source), sort_within_batch=True):
        return BucketIterator(dataset, batch_size=batch_size, device=device, sort_key=sort_key,
                              sort_within_batch=sort_within_batch) 
開發者ID:smilelight,項目名稱:lightNLP,代碼行數:6,代碼來源:tool.py


注:本文中的torchtext.data.BucketIterator方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。