本文整理汇总了Python中torch.cuda.device_count方法的典型用法代码示例。如果您正苦于以下问题:Python cuda.device_count方法的具体用法?Python cuda.device_count怎么用?Python cuda.device_count使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.cuda
的用法示例。
在下文中一共展示了cuda.device_count方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _get_stream
# 需要导入模块: from torch import cuda [as 别名]
# 或者: from torch.cuda import device_count [as 别名]
def _get_stream(device):
"""Gets a background stream for copying between CPU and GPU"""
global _streams
if device == -1:
return None
if _streams is None:
_streams = [None] * cuda.device_count()
if _streams[device] is None: _streams[device] = cuda.Stream(device)
return _streams[device]
示例2: __init__
# 需要导入模块: from torch import cuda [as 别名]
# 或者: from torch.cuda import device_count [as 别名]
def __init__(
self,
serialization_dir: str,
cuda_device: Optional[Union[int, torch.device]] = None,
distributed: bool = False,
local_rank: int = 0,
world_size: int = 1,
) -> None:
if cuda_device is None:
from torch import cuda
if cuda.device_count() > 0:
cuda_device = 0
else:
cuda_device = -1
check_for_gpu(cuda_device)
self._serialization_dir = serialization_dir
if isinstance(cuda_device, list):
raise ConfigurationError(
"In allennlp 1.0, the Trainer can only be assigned a single `cuda_device`. "
"Instead, we use torch's DistributedDataParallel at the command level, meaning "
"our Trainer always uses a single GPU per process."
)
if distributed and world_size <= 1:
raise ConfigurationError(
"Distributed training can be performed only with more than 1 device. Check "
"`cuda_device` key in the experiment configuration."
)
self.cuda_device = int_to_device(cuda_device)
self._distributed = distributed
self._rank = local_rank
self._master = self._rank == 0
self._world_size = world_size
示例3: check_for_gpu
# 需要导入模块: from torch import cuda [as 别名]
# 或者: from torch.cuda import device_count [as 别名]
def check_for_gpu(device: Union[int, torch.device, List[Union[int, torch.device]]]):
if isinstance(device, list):
for did in device:
check_for_gpu(did)
elif device is None:
return
else:
from allennlp.common.util import int_to_device
device = int_to_device(device)
if device != torch.device("cpu"):
num_devices_available = cuda.device_count()
if num_devices_available == 0:
# Torch will give a more informative exception than ours, so we want to include
# that context as well if it's available. For example, if you try to run torch 1.5
# on a machine with CUDA10.1 you'll get the following:
#
# The NVIDIA driver on your system is too old (found version 10010).
#
torch_gpu_error = ""
try:
cuda._check_driver()
except Exception as e:
torch_gpu_error = "\n{0}".format(e)
raise ConfigurationError(
"Experiment specified a GPU but none is available;"
" if you want to run on CPU use the override"
" 'trainer.cuda_device=-1' in the json config file." + torch_gpu_error
)
elif device.index >= num_devices_available:
raise ConfigurationError(
f"Experiment specified GPU device {device.index}"
f" but there are only {num_devices_available} devices "
f" available."
)
示例4: check_for_gpu
# 需要导入模块: from torch import cuda [as 别名]
# 或者: from torch.cuda import device_count [as 别名]
def check_for_gpu(device_id ):
if device_id is not None and device_id >= cuda.device_count():
raise ConfigurationError(u"Experiment specified a GPU but none is available;"
u" if you want to run on CPU use the override"
u" 'trainer.cuda_device=-1' in the json config file.")
示例5: check_for_gpu
# 需要导入模块: from torch import cuda [as 别名]
# 或者: from torch.cuda import device_count [as 别名]
def check_for_gpu(params) -> object:
device_id = params['cuda_device']
if device_id is not None and device_id >= cuda.device_count():
raise ConfigurationError("Experiment specified a GPU but none is available;"
" if you want to run on CPU use the override"
" 'trainer.cuda_device=-1' in the json config file.")
示例6: check_for_gpu
# 需要导入模块: from torch import cuda [as 别名]
# 或者: from torch.cuda import device_count [as 别名]
def check_for_gpu(device_id: int):
if device_id is not None and device_id >= cuda.device_count():
raise ConfigurationError("Experiment specified a GPU but none is available;"
" if you want to run on CPU use the override"
" 'trainer.cuda_device=-1' in the json config file.")
示例7: __init__
# 需要导入模块: from torch import cuda [as 别名]
# 或者: from torch.cuda import device_count [as 别名]
def __init__(self, module,
device_ids=None, output_device=None, dim=0,
allow_replication_callback=True,
user_scattered=False, use_scatter_stream=True,
scatter_func=None,
use_dict_gather=True, dict_gather_layout=None,
persistent=False, copy_parameters=False, copy_buffers=True):
super(DataParallel, self).__init__()
if device_ids is None:
device_ids = list(range(cuda.device_count()))
if output_device is None:
output_device = device_ids[0]
self.dim = dim
self.module = module
self.device_ids = device_ids
self.output_device = output_device
if len(self.device_ids) == 1:
self.module.cuda(device_ids[0])
self.allow_replication_callback = allow_replication_callback
self.user_scattered = user_scattered
self.use_scatter_stream = use_scatter_stream
self.scatter_func = scatter_func
self.use_dict_gather = use_dict_gather
self.dict_gather_layout = dict_gather_layout
self.persistent = persistent
self.copy_parameters = copy_parameters
self.copy_buffers = copy_buffers
self.replicas = nn.ModuleList()
示例8: get_num_nodes
# 需要导入模块: from torch import cuda [as 别名]
# 或者: from torch.cuda import device_count [as 别名]
def get_num_nodes() -> int:
# assume all nodes have the same number of gpus
if not is_distributed():
return 1
else:
return get_world_size() // device_count()
示例9: get_info
# 需要导入模块: from torch import cuda [as 别名]
# 或者: from torch.cuda import device_count [as 别名]
def get_info():
"""
Get gpu info.
:return: <dict> gpu info
"""
return {
"has_cuda": cuda.is_available(),
"devices": [] if not cuda.is_available() else [cuda.get_device_name(i) for i in range(cuda.device_count())],
}
示例10: init_distributed
# 需要导入模块: from torch import cuda [as 别名]
# 或者: from torch.cuda import device_count [as 别名]
def init_distributed(use_horovod: bool = False,
backend: Optional[str] = None,
init_method: Optional[str] = None,
warning: bool = True):
""" Simple initializer for distributed training.
:param use_horovod:
:param backend: backend when
:param init_method:
:param warning:
:return:
"""
if not is_distributed_available():
raise RuntimeError('Distributed training is not available on this machine')
global _DISTRIBUTED_FLAG
_DISTRIBUTED_FLAG = True
if use_horovod:
if backend is not None or init_method is not None:
raise RuntimeError('Try to use horovod, but `backend` and `init_method` are not None')
if is_horovod_available():
import horovod.torch as hvd
hvd.init()
logger.debug("init horovod")
else:
raise RuntimeError('horovod is not available!')
else:
if backend is None:
backend = "nccl"
if init_method:
init_method = "env://"
if not is_distributed():
raise RuntimeError(
f"For distributed training, use `python -m torch.distributed.launch "
f"--nproc_per_node={device_count()} {args}` ...")
if distributed.is_initialized():
if warning:
logger.warn("`distributed` is already initialized. Skipped.")
else:
distributed.init_process_group(backend=backend, init_method=init_method)
logger.debug("init distributed")
if not is_master():
def no_print(*values, **kwargs):
pass
builtins.print = no_print