当前位置: 首页>>代码示例>>Python>>正文


Python data.FairseqDataset方法代码示例

本文整理汇总了Python中fairseq.data.FairseqDataset方法的典型用法代码示例。如果您正苦于以下问题:Python data.FairseqDataset方法的具体用法?Python data.FairseqDataset怎么用?Python data.FairseqDataset使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在fairseq.data的用法示例。


在下文中一共展示了data.FairseqDataset方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: dataset

# 需要导入模块: from fairseq import data [as 别名]
# 或者: from fairseq.data import FairseqDataset [as 别名]
def dataset(self, split):
        """
        Return a loaded dataset split.

        Args:
            split (str): name of the split (e.g., train, valid, test)

        Returns:
            a :class:`~fairseq.data.FairseqDataset` corresponding to *split*
        """
        from fairseq.data import FairseqDataset

        if split not in self.datasets:
            raise KeyError("Dataset not loaded: " + split)
        if not isinstance(self.datasets[split], FairseqDataset):
            raise TypeError("Datasets are expected to be of type FairseqDataset")
        return self.datasets[split] 
开发者ID:pytorch,项目名称:fairseq,代码行数:19,代码来源:fairseq_task.py

示例2: dataset

# 需要导入模块: from fairseq import data [as 别名]
# 或者: from fairseq.data import FairseqDataset [as 别名]
def dataset(self, split):
        """
        Return a loaded dataset split.

        Args:
            split (str): name of the split (e.g., train, valid, test)

        Returns:
            a :class:`~fairseq.data.FairseqDataset` corresponding to *split*
        """
        from fairseq.data import FairseqDataset
        if split not in self.datasets:
            raise KeyError('Dataset not loaded: ' + split)
        if not isinstance(self.datasets[split], FairseqDataset):
            raise TypeError('Datasets are expected to be of type FairseqDataset')
        return self.datasets[split] 
开发者ID:kakaobrain,项目名称:helo_word,代码行数:18,代码来源:fairseq_task.py

示例3: dataset

# 需要导入模块: from fairseq import data [as 别名]
# 或者: from fairseq.data import FairseqDataset [as 别名]
def dataset(self, split):
        """Return a dataset split."""
        if split not in self.datasets:
            raise KeyError('Dataset not loaded: ' + split)
        if not isinstance(self.datasets[split], FairseqDataset):
            raise TypeError('Datasets are expected to be of type FairseqDataset')
        return self.datasets[split] 
开发者ID:nusnlp,项目名称:crosentgec,代码行数:9,代码来源:fairseq_task.py

示例4: train_step

# 需要导入模块: from fairseq import data [as 别名]
# 或者: from fairseq.data import FairseqDataset [as 别名]
def train_step(
        self, sample, model, criterion, optimizer, update_num, ignore_grad=False
    ):
        """
        Do forward and backward, and return the loss as computed by *criterion*
        for the given *model* and *sample*.

        Args:
            sample (dict): the mini-batch. The format is defined by the
                :class:`~fairseq.data.FairseqDataset`.
            model (~fairseq.models.BaseFairseqModel): the model
            criterion (~fairseq.criterions.FairseqCriterion): the criterion
            optimizer (~fairseq.optim.FairseqOptimizer): the optimizer
            update_num (int): the current update
            ignore_grad (bool): multiply loss by 0 if this is set to True

        Returns:
            tuple:
                - the loss
                - the sample size, which is used as the denominator for the
                  gradient
                - logging outputs to display while training
        """
        model.train()
        model.set_num_updates(update_num)
        with torch.autograd.profiler.record_function("forward"):
            loss, sample_size, logging_output = criterion(model, sample)
        if ignore_grad:
            loss *= 0
        with torch.autograd.profiler.record_function("backward"):
            optimizer.backward(loss)
        return loss, sample_size, logging_output 
开发者ID:pytorch,项目名称:fairseq,代码行数:34,代码来源:fairseq_task.py

示例5: dataset

# 需要导入模块: from fairseq import data [as 别名]
# 或者: from fairseq.data import FairseqDataset [as 别名]
def dataset(self, split):
        """Return a dataset split."""
        from fairseq.data import FairseqDataset
        if split not in self.datasets:
            raise KeyError('Dataset not loaded: ' + split)
        if not isinstance(self.datasets[split], FairseqDataset):
            raise TypeError('Datasets are expected to be of type FairseqDataset')
        return self.datasets[split] 
开发者ID:mlperf,项目名称:training_results_v0.5,代码行数:10,代码来源:fairseq_task.py

示例6: train_step

# 需要导入模块: from fairseq import data [as 别名]
# 或者: from fairseq.data import FairseqDataset [as 别名]
def train_step(
        self, sample, model, criterion, optimizer, update_num, ignore_grad=False
    ):
        """
        Do forward and backward, and return the loss as computed by *criterion*
        for the given *model* and *sample*.

        Args:
            sample (dict): the mini-batch. The format is defined by the
                :class:`~fairseq.data.FairseqDataset`.
            model (~fairseq.models.BaseFairseqModel): the model
            criterion (~fairseq.criterions.FairseqCriterion): the criterion
            optimizer (~fairseq.optim.FairseqOptimizer): the optimizer
            update_num (int): the current update
            ignore_grad (bool): multiply loss by 0 if this is set to True

        Returns:
            tuple:
                - the loss
                - the sample size, which is used as the denominator for the
                  gradient
                - logging outputs to display while training
        """
        model.train()
        model.set_num_updates(update_num)
        loss, sample_size, logging_output = criterion(model, sample)
        if ignore_grad:
            loss *= 0
        optimizer.backward(loss)
        return loss, sample_size, logging_output 
开发者ID:elbayadm,项目名称:attn2d,代码行数:32,代码来源:fairseq_task.py

示例7: train_step

# 需要导入模块: from fairseq import data [as 别名]
# 或者: from fairseq.data import FairseqDataset [as 别名]
def train_step(self, sample, model, criterion, optimizer, ignore_grad=False):
        """
        Do forward and backward, and return the loss as computed by *criterion*
        for the given *model* and *sample*.

        Args:
            sample (dict): the mini-batch. The format is defined by the
                :class:`~fairseq.data.FairseqDataset`.
            model (~fairseq.models.BaseFairseqModel): the model
            criterion (~fairseq.criterions.FairseqCriterion): the criterion
            optimizer (~fairseq.optim.FairseqOptimizer): the optimizer
            ignore_grad (bool): multiply loss by 0 if this is set to True

        Returns:
            tuple:
                - the loss
                - the sample size, which is used as the denominator for the
                  gradient
                - logging outputs to display while training
        """
        model.train()
        loss, sample_size, logging_output = criterion(model, sample)
        if ignore_grad:
            loss *= 0
        optimizer.backward(loss)
        return loss, sample_size, logging_output 
开发者ID:kakaobrain,项目名称:helo_word,代码行数:28,代码来源:fairseq_task.py

示例8: __init__

# 需要导入模块: from fairseq import data [as 别名]
# 或者: from fairseq.data import FairseqDataset [as 别名]
def __init__(
            self,
            dataset: FairseqDataset,
            sizes: np.ndarray,
            vocab: Dictionary,
            pad_idx: int,
            mask_idx: int,
            classif_token_idx: int,
            sep_token_idx: int,
            seed: int = 1,
            shuffle: bool = True,
            has_pairs: bool = True,
            segment_id: int = 0,
            masking_ratio: float = 0.15,
            masking_prob: float = 0.8,
            random_token_prob: float = 0.1
    ):
        # Make sure the input datasets are the ones supported
        assert (
            isinstance(dataset, TokenBlockDataset) or
            isinstance(dataset, BlockPairDataset) or
            isinstance(dataset, ConcatDataset)
        ), "MaskedLMDataset only wraps TokenBlockDataset or BlockPairDataset or " \
           "ConcatDataset"

        self.dataset = dataset
        self.sizes = np.array(sizes)
        self.vocab = vocab
        self.pad_idx = pad_idx
        self.mask_idx = mask_idx
        self.classif_token_idx = classif_token_idx
        self.sep_token_idx = sep_token_idx
        self.shuffle = shuffle
        self.seed = seed
        self.has_pairs = has_pairs
        self.segment_id = segment_id
        self.masking_ratio = masking_ratio
        self.masking_prob = masking_prob
        self.random_token_prob = random_token_prob

        # If we have only one block then sizes needs to be updated to include
        # the classification token
        if not has_pairs:
            self.sizes = self.sizes + 1 
开发者ID:pytorch,项目名称:fairseq,代码行数:46,代码来源:masked_lm_dataset.py

示例9: get_batch_iterator

# 需要导入模块: from fairseq import data [as 别名]
# 或者: from fairseq.data import FairseqDataset [as 别名]
def get_batch_iterator(
        self,
        dataset,
        max_tokens=None,
        max_sentences=None,
        max_positions=None,
        ignore_invalid_inputs=False,
        required_batch_size_multiple=1,
        seed=1,
        num_shards=1,
        shard_id=0,
        num_workers=0,
    ):
        assert isinstance(dataset, FairseqDataset)

        # get indices ordered by example size
        with data_utils.numpy_seed(seed):
            indices = dataset.ordered_indices()

        # filter examples that are too large
        indices = data_utils.filter_by_size(
            indices,
            dataset,
            max_positions,
            raise_exception=(not ignore_invalid_inputs),
        )

        # create mini-batches with given size constraints
        batch_sampler = data_utils.batch_by_size(
            indices,
            dataset.num_tokens,
            max_tokens=max_tokens,
            max_sentences=max_sentences,
            required_batch_size_multiple=required_batch_size_multiple,
        )

        # return a reusable, sharded iterator
        return ptt_iterators.WeightedEpochBatchIterator(
            dataset=dataset,
            collate_fn=dataset.collater,
            batch_sampler=batch_sampler,
            seed=seed,
            num_shards=num_shards,
            shard_id=shard_id,
            num_workers=num_workers,
            weights=self.loss_weights,
        ) 
开发者ID:pytorch,项目名称:translate,代码行数:49,代码来源:pytorch_translate_multi_task.py

示例10: get_batch_iterator

# 需要导入模块: from fairseq import data [as 别名]
# 或者: from fairseq.data import FairseqDataset [as 别名]
def get_batch_iterator(
        self, dataset, max_tokens=None, max_sentences=None, max_positions=None,
        ignore_invalid_inputs=False, required_batch_size_multiple=1,
        seed=1, num_shards=1, shard_id=0, num_workers=0,
    ):
        """
        Get an iterator that yields batches of data from the given dataset.

        Args:
            dataset (~fairseq.data.FairseqDataset): dataset to batch
            max_tokens (int, optional): max number of tokens in each batch
                (default: None).
            max_sentences (int, optional): max number of sentences in each
                batch (default: None).
            max_positions (optional): max sentence length supported by the
                model (default: None).
            ignore_invalid_inputs (bool, optional): don't raise Exception for
                sentences that are too long (default: False).
            required_batch_size_multiple (int, optional): require batch size to
                be a multiple of N (default: 1).
            seed (int, optional): seed for random number generator for
                reproducibility (default: 1).
            num_shards (int, optional): shard the data iterator into N
                shards (default: 1).
            shard_id (int, optional): which shard of the data iterator to
                return (default: 0).
            num_workers (int, optional): how many subprocesses to use for data
                loading. 0 means the data will be loaded in the main process
                (default: 0).

        Returns:
            ~fairseq.iterators.EpochBatchIterator: a batched iterator over the
                given dataset split
        """
        assert isinstance(dataset, FairseqDataset)

        # get indices ordered by example size
        with data_utils.numpy_seed(seed):
            indices = dataset.ordered_indices()

        # filter examples that are too large
        indices = data_utils.filter_by_size(
            indices, dataset.size, max_positions, raise_exception=(not ignore_invalid_inputs),
        )

        # create mini-batches with given size constraints
        batch_sampler = data_utils.batch_by_size(
            indices, dataset.num_tokens, max_tokens=max_tokens, max_sentences=max_sentences,
            required_batch_size_multiple=required_batch_size_multiple,
        )

        # return a reusable, sharded iterator
        return iterators.EpochBatchIterator(
            dataset=dataset,
            collate_fn=dataset.collater,
            batch_sampler=batch_sampler,
            seed=seed,
            num_shards=num_shards,
            shard_id=shard_id,
            num_workers=num_workers,
        ) 
开发者ID:kakaobrain,项目名称:helo_word,代码行数:63,代码来源:fairseq_task.py


注:本文中的fairseq.data.FairseqDataset方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。