本文整理汇总了Python中torchtext.data.BucketIterator方法的典型用法代码示例。如果您正苦于以下问题:Python data.BucketIterator方法的具体用法?Python data.BucketIterator怎么用?Python data.BucketIterator使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torchtext.data
的用法示例。
在下文中一共展示了data.BucketIterator方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: build_bucket_iterator
# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def build_bucket_iterator(dataset, device, batch_size, is_train):
device_obj = None if device is None else torch.device(device)
iterator = data.BucketIterator(
dataset=dataset,
batch_size=batch_size,
repeat=False,
sort_key=dataset.sort_key,
sort=False,
# sorts the data within each minibatch in decreasing order
# set to true if you want use pack_padded_sequences
sort_within_batch=is_train,
# shuffle batches
shuffle=is_train,
device=device_obj,
train=is_train,
)
return iterator
示例2: load_dataloaders
# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def load_dataloaders(args):
logger.info("Preparing dataloaders...")
FR = torchtext.data.Field(tokenize=dum_tokenizer, lower=True, init_token="<sos>", eos_token="<eos>",\
batch_first=True)
EN = torchtext.data.Field(tokenize=dum_tokenizer, lower=True, batch_first=True)
train_path = os.path.join("./data/", "df.csv")
if not os.path.isfile(train_path):
tokenize_data(args)
train = torchtext.data.TabularDataset(train_path, format="csv", \
fields=[("EN", EN), ("FR", FR)])
FR.build_vocab(train)
EN.build_vocab(train)
train_iter = BucketIterator(train, batch_size=args.batch_size, repeat=False, sort_key=lambda x: (len(x["EN"]), len(x["FR"])),\
shuffle=True, train=True)
train_length = len(train)
logger.info("Loaded dataloaders.")
return train_iter, FR, EN, train_length
示例3: prepare_dataloaders
# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def prepare_dataloaders(opt, device):
batch_size = opt.batch_size
data = pickle.load(open(opt.data_pkl, 'rb'))
opt.max_token_seq_len = data['settings'].max_len
opt.src_pad_idx = data['vocab']['src'].vocab.stoi[Constants.PAD_WORD]
opt.trg_pad_idx = data['vocab']['trg'].vocab.stoi[Constants.PAD_WORD]
opt.src_vocab_size = len(data['vocab']['src'].vocab)
opt.trg_vocab_size = len(data['vocab']['trg'].vocab)
#========= Preparing Model =========#
if opt.embs_share_weight:
assert data['vocab']['src'].vocab.stoi == data['vocab']['trg'].vocab.stoi, \
'To sharing word embedding the src/trg word2idx table shall be the same.'
fields = {'src': data['vocab']['src'], 'trg':data['vocab']['trg']}
train = Dataset(examples=data['train'], fields=fields)
val = Dataset(examples=data['valid'], fields=fields)
train_iterator = BucketIterator(train, batch_size=batch_size, device=device, train=True)
val_iterator = BucketIterator(val, batch_size=batch_size, device=device)
return train_iterator, val_iterator
示例4: csv_data_loader
# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def csv_data_loader(file_path,fields,split_ratio=None,split_seed=None,skip_header=False,save_vocab_path=os.getcwd(),batch_size=32,device=None,train=True,**args):
"""
:param file_path:
:param fields:
:param split_ratio:
:param split_seed:
:param skip_header:
:param save_vocab_path:
:param batch_size:
:param device:
:param train:
:param args:
:return:
"""
dataset = load_tabular_set(file_path,"csv",fields=fields,split_ratio=split_ratio,split_seed=split_seed,skip_header=skip_header,save_vocab_path=save_vocab_path,**args)
return BucketIterator(dataset,batch_size=batch_size,device=device,train=True,shuffle=train,repeat=False)
示例5: csv_data_split_loader
# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def csv_data_split_loader(root_path,fields,train=None,val=None,test=None,skip_header=False,save_vocab_path=os.getcwd(),batch_size=32,device=None,**args):
"""
:param root_path:
:param fields:
:param train:
:param val:
:param test:
:param skip_header:
:param save_vocab_path:
:param batch_size:
:param device:
:param args:
:return:
"""
dataset = load_tabular_set_split(root_path,"csv",fields=fields,train=train,val=val, test=test,skip_header=skip_header,save_vocab_path=save_vocab_path,**args)
return BucketIterator(dataset, batch_size=batch_size, device=device, train=True, shuffle=train,repeat=False)
示例6: tsv_data_loader
# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def tsv_data_loader(file_path,fields,split_ratio=None,split_seed=None,skip_header=False,save_vocab_path=os.getcwd(),batch_size=32,device=None,train=True,**args):
"""
:param file_path:
:param fields:
:param split_ratio:
:param split_seed:
:param skip_header:
:param save_vocab_path:
:param batch_size:
:param device:
:param train:
:param args:
:return:
"""
dataset = load_tabular_set(file_path,"tsv",fields=fields,split_ratio=split_ratio,split_seed=split_seed,skip_header=skip_header,save_vocab_path=save_vocab_path,**args)
return BucketIterator(dataset, batch_size=batch_size, device=device, train=True, shuffle=train,repeat=False)
示例7: tsv_data_split_loader
# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def tsv_data_split_loader(root_path,fields,train=None,val=None,test=None,skip_header=False,save_vocab_path=os.getcwd(),batch_size=32,device=None,**args):
"""
:param root_path:
:param fields:
:param train:
:param val:
:param test:
:param skip_header:
:param save_vocab_path:
:param batch_size:
:param device:
:param args:
:return:
"""
dataset = load_tabular_set_split(root_path,"tsv",fields=fields,train=train,val=val,test=test,skip_header=skip_header,save_vocab_path=save_vocab_path,**args)
return BucketIterator(dataset, batch_size=batch_size, device=device, train=True, shuffle=train,repeat=False)
示例8: json_data_loader
# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def json_data_loader(file_path,fields,split_ratio=None,split_seed=None,skip_header=False,save_vocab_path=os.getcwd(),batch_size=32,device=None,train=True,**args):
"""
:param file_path:
:param fields:
:param split_ratio:
:param split_seed:
:param skip_header:
:param save_vocab_path:
:param batch_size:
:param device:
:param train:
:param args:
:return:
"""
dataset = load_tabular_set(file_path,"json",fields=fields,split_ratio=split_ratio,split_seed=split_seed,skip_header=skip_header,save_vocab_path=save_vocab_path,**args)
return BucketIterator(dataset, batch_size=batch_size, device=device, train=True, shuffle=train,repeat=False)
示例9: __init__
# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def __init__(self, args):
self.RAW = data.RawField()
self.RAW.is_target = False
tokenize = lambda x: list(x)
self.TEXT = data.Field(batch_first=True, tokenize=tokenize)
self.LABEL = data.Field(sequential=False, unk_token=None)
self.train, self.dev, self.test = data.TabularDataset.splits(
path='/data/nfsdata/nlp/datasets/sentence_pair/bq_corpus_torch10',
train='BQ_train.json',
validation='BQ_dev.json',
test='BQ_test.json',
format='json',
fields={"gold_label": ("label", self.LABEL),
"sentence1": ("q1", self.TEXT),
"sentence2": ("q2", self.TEXT),
"ID": ("id", self.RAW)})
self.TEXT.build_vocab(self.train, self.dev, self.test, vectors=Vectors("BQ300", args.data))
self.LABEL.build_vocab(self.train)
sort_key = lambda x: data.interleave_keys(len(x.q1), len(x.q2))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.train_iter = data.BucketIterator(self.train, batch_size=args.batch_size, device=device, sort_key=sort_key, sort=True)
self.dev_iter = data.BucketIterator(self.dev, batch_size=args.batch_size, device=device, sort_key=sort_key, sort=True)
self.test_iter = data.BucketIterator(self.test, batch_size=args.batch_size, device=device, sort_key=sort_key, sort=True)
示例10: get_iterator
# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def get_iterator(self, dataset: Dataset, batch_size=DEFAULT_CONFIG['batch_size'], device=DEVICE,
sort_key=lambda x: len(x.texta)):
return BucketIterator(dataset, batch_size=batch_size, device=device, sort_key=sort_key)
示例11: get_iterator
# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def get_iterator(self, dataset: Dataset, batch_size=DEFAULT_CONFIG['batch_size'], device=DEVICE,
sort_key=lambda x: len(x.text), sort_within_batch=True):
return BucketIterator(dataset, batch_size=batch_size, device=device, sort_key=sort_key,
sort_within_batch=sort_within_batch)
示例12: get_iterator
# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def get_iterator(self, dataset: Dataset, batch_size=DEFAULT_CONFIG['batch_size'], device=DEVICE):
return BucketIterator(dataset, batch_size=batch_size, device=device)
示例13: get_iterator
# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def get_iterator(self, dataset: Dataset, batch_size=DEFAULT_CONFIG['batch_size'], device=DEVICE,
sort_key=lambda x: len(x.word), sort_within_batch=True):
return BucketIterator(dataset, batch_size=batch_size, device=device, sort_key=sort_key,
sort_within_batch=sort_within_batch)
示例14: get_iterator
# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def get_iterator(self, dataset: Dataset, batch_size=DEFAULT_CONFIG['batch_size'], device=DEVICE,
sort_key=lambda x: len(x.query), sort_within_batch=True):
return BucketIterator(dataset, batch_size=batch_size, device=device, sort_key=sort_key,
sort_within_batch=sort_within_batch)
示例15: get_iterator
# 需要导入模块: from torchtext import data [as 别名]
# 或者: from torchtext.data import BucketIterator [as 别名]
def get_iterator(self, dataset: Dataset, batch_size=DEFAULT_CONFIG['batch_size'], device=DEVICE,
sort_key=lambda x: len(x.source), sort_within_batch=True):
return BucketIterator(dataset, batch_size=batch_size, device=device, sort_key=sort_key,
sort_within_batch=sort_within_batch)