本文整理汇总了Python中pynvml.nvmlDeviceGetHandleByIndex方法的典型用法代码示例。如果您正苦于以下问题:Python pynvml.nvmlDeviceGetHandleByIndex方法的具体用法?Python pynvml.nvmlDeviceGetHandleByIndex怎么用?Python pynvml.nvmlDeviceGetHandleByIndex使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pynvml
的用法示例。
在下文中一共展示了pynvml.nvmlDeviceGetHandleByIndex方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: getFreeId
# 需要导入模块: import pynvml [as 别名]
# 或者: from pynvml import nvmlDeviceGetHandleByIndex [as 别名]
def getFreeId():
import pynvml
pynvml.nvmlInit()
def getFreeRatio(id):
handle = pynvml.nvmlDeviceGetHandleByIndex(id)
use = pynvml.nvmlDeviceGetUtilizationRates(handle)
ratio = 0.5*(float(use.gpu+float(use.memory)))
return ratio
deviceCount = pynvml.nvmlDeviceGetCount()
available = []
for i in range(deviceCount):
if getFreeRatio(i)<70:
available.append(i)
gpus = ''
for g in available:
gpus = gpus+str(g)+','
gpus = gpus[:-1]
return gpus
示例2: getGPUUsage
# 需要导入模块: import pynvml [as 别名]
# 或者: from pynvml import nvmlDeviceGetHandleByIndex [as 别名]
def getGPUUsage():
try:
pynvml.nvmlInit()
count = pynvml.nvmlDeviceGetCount()
if count == 0:
return None
result = {"driver": pynvml.nvmlSystemGetDriverVersion(),
"gpu_count": int(count)
}
i = 0
gpuData = []
while i<count:
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
mem = pynvml.nvmlDeviceGetMemoryInfo(handle)
gpuData.append({"device_num": i, "name": pynvml.nvmlDeviceGetName(handle), "total": round(float(mem.total)/1000000000, 2), "used": round(float(mem.used)/1000000000, 2)})
i = i+1
result["devices"] = jsonpickle.encode(gpuData, unpicklable=False)
except Exception as e:
result = {"driver": "No GPU!", "gpu_count": 0, "devices": []}
return result
示例3: gpu_info
# 需要导入模块: import pynvml [as 别名]
# 或者: from pynvml import nvmlDeviceGetHandleByIndex [as 别名]
def gpu_info(self):
# pip install nvidia-ml-py3
if len(self.gpu_ids) >= 0 and torch.cuda.is_available():
try:
import pynvml
pynvml.nvmlInit()
self.config_dic['gpu_driver_version'] = pynvml.nvmlSystemGetDriverVersion()
for gpu_id in self.gpu_ids:
handle = pynvml.nvmlDeviceGetHandleByIndex(gpu_id)
gpu_id_name = "gpu%s" % gpu_id
mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
gpu_utilize = pynvml.nvmlDeviceGetUtilizationRates(handle)
self.config_dic['%s_device_name' % gpu_id_name] = pynvml.nvmlDeviceGetName(handle)
self.config_dic['%s_mem_total' % gpu_id_name] = gpu_mem_total = round(mem_info.total / 1024 ** 3, 2)
self.config_dic['%s_mem_used' % gpu_id_name] = gpu_mem_used = round(mem_info.used / 1024 ** 3, 2)
# self.config_dic['%s_mem_free' % gpu_id_name] = gpu_mem_free = mem_info.free // 1024 ** 2
self.config_dic['%s_mem_percent' % gpu_id_name] = round((gpu_mem_used / gpu_mem_total) * 100, 1)
self._set_dict_smooth('%s_utilize_gpu' % gpu_id_name, gpu_utilize.gpu, 0.8)
# self.config_dic['%s_utilize_gpu' % gpu_id_name] = gpu_utilize.gpu
# self.config_dic['%s_utilize_memory' % gpu_id_name] = gpu_utilize.memory
pynvml.nvmlShutdown()
except Exception as e:
print(e)
示例4: getFreeId
# 需要导入模块: import pynvml [as 别名]
# 或者: from pynvml import nvmlDeviceGetHandleByIndex [as 别名]
def getFreeId():
import pynvml
pynvml.nvmlInit()
def getFreeRatio(id):
handle = pynvml.nvmlDeviceGetHandleByIndex(id)
use = pynvml.nvmlDeviceGetUtilizationRates(handle)
ratio = 0.5 * (float(use.gpu + float(use.memory)))
return ratio
deviceCount = pynvml.nvmlDeviceGetCount()
available = []
for i in range(deviceCount):
if getFreeRatio(i) < 70:
available.append(i)
gpus = ''
for g in available:
gpus = gpus + str(g) + ','
gpus = gpus[:-1]
return gpus
示例5: __init__
# 需要导入模块: import pynvml [as 别名]
# 或者: from pynvml import nvmlDeviceGetHandleByIndex [as 别名]
def __init__(self, id=0):
"""Create object to control device using NVML"""
pynvml.nvmlInit()
self.dev = pynvml.nvmlDeviceGetHandleByIndex(id)
try:
self._pwr_limit = pynvml.nvmlDeviceGetPowerManagementLimit(self.dev)
self.pwr_constraints = pynvml.nvmlDeviceGetPowerManagementLimitConstraints(self.dev)
except pynvml.NVMLError_NotSupported:
self._pwr_limit = None
self.pwr_constraints = [1, 0] # inverted range to make all range checks fail
try:
self._persistence_mode = pynvml.nvmlDeviceGetPersistenceMode(self.dev)
except pynvml.NVMLError_NotSupported:
self._persistence_mode = None
try:
self._auto_boost = pynvml.nvmlDeviceGetAutoBoostedClocksEnabled(self.dev)[0] # returns [isEnabled, isDefaultEnabled]
except pynvml.NVMLError_NotSupported:
self._auto_boost = None
try:
self.gr_clock_default = pynvml.nvmlDeviceGetDefaultApplicationsClock(self.dev, pynvml.NVML_CLOCK_GRAPHICS)
self.sm_clock_default = pynvml.nvmlDeviceGetDefaultApplicationsClock(self.dev, pynvml.NVML_CLOCK_SM)
self.mem_clock_default = pynvml.nvmlDeviceGetDefaultApplicationsClock(self.dev, pynvml.NVML_CLOCK_MEM)
self.supported_mem_clocks = pynvml.nvmlDeviceGetSupportedMemoryClocks(self.dev)
#gather the supported gr clocks for each supported mem clock into a dict
self.supported_gr_clocks = dict()
for mem_clock in self.supported_mem_clocks:
supported_gr_clocks = pynvml.nvmlDeviceGetSupportedGraphicsClocks(self.dev, mem_clock)
self.supported_gr_clocks[mem_clock] = supported_gr_clocks
except pynvml.NVMLError_NotSupported:
self.gr_clock_default = None
self.sm_clock_default = None
self.mem_clock_default = None
self.supported_mem_clocks = []
self.supported_gr_clocks = dict()
示例6: get_appropriate_cuda
# 需要导入模块: import pynvml [as 别名]
# 或者: from pynvml import nvmlDeviceGetHandleByIndex [as 别名]
def get_appropriate_cuda(task_scale='s'):
if task_scale not in {'s','m','l'}:
logger.info('task scale wrong!')
exit(2)
import pynvml
pynvml.nvmlInit()
total_cuda_num = pynvml.nvmlDeviceGetCount()
for i in range(total_cuda_num):
logger.info(i)
handle = pynvml.nvmlDeviceGetHandleByIndex(i) # 这里的0是GPU id
memInfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
utilizationInfo = pynvml.nvmlDeviceGetUtilizationRates(handle)
logger.info(i, 'mem:', memInfo.used / memInfo.total, 'util:',utilizationInfo.gpu)
if memInfo.used / memInfo.total < 0.15 and utilizationInfo.gpu <0.2:
logger.info(i,memInfo.used / memInfo.total)
return 'cuda:'+str(i)
if task_scale=='s':
max_memory=2000
elif task_scale=='m':
max_memory=6000
else:
max_memory = 9000
max_id = -1
for i in range(total_cuda_num):
handle = pynvml.nvmlDeviceGetHandleByIndex(0) # 这里的0是GPU id
memInfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
utilizationInfo = pynvml.nvmlDeviceGetUtilizationRates(handle)
if max_memory < memInfo.free:
max_memory = memInfo.free
max_id = i
if id == -1:
logger.info('no appropriate gpu, wait!')
exit(2)
return 'cuda:'+str(max_id)
# if memInfo.used / memInfo.total < 0.5:
# return
示例7: device_name
# 需要导入模块: import pynvml [as 别名]
# 或者: from pynvml import nvmlDeviceGetHandleByIndex [as 别名]
def device_name():
with pynvml_context():
device_name = device_name_for(pynvml.nvmlDeviceGetHandleByIndex(0))
return device_name
示例8: aggregate_measurements
# 需要导入模块: import pynvml [as 别名]
# 或者: from pynvml import nvmlDeviceGetHandleByIndex [as 别名]
def aggregate_measurements(device_count):
measures_for_device = compose(measurements_for,
pynvml.nvmlDeviceGetHandleByIndex)
return {i: measures_for_device(i) for i in range(device_count)}
示例9: _get_handles
# 需要导入模块: import pynvml [as 别名]
# 或者: from pynvml import nvmlDeviceGetHandleByIndex [as 别名]
def _get_handles(self):
""" Obtain the internal handle identifiers for the system GPUs and allocate to
:attr:`_handles`. """
if self._is_plaidml:
self._handles = self._plaid.devices
elif IS_MACOS:
self._handles = pynvx.cudaDeviceGetHandles(ignore=True)
else:
self._handles = [pynvml.nvmlDeviceGetHandleByIndex(i)
for i in range(self._device_count)]
self._log("debug", "GPU Handles found: {}".format(len(self._handles)))
示例10: get_gpu_metrics
# 需要导入模块: import pynvml [as 别名]
# 或者: from pynvml import nvmlDeviceGetHandleByIndex [as 别名]
def get_gpu_metrics() -> List:
try:
pynvml.nvmlInit()
device_count = pynvml.nvmlDeviceGetCount()
results = []
for i in range(device_count):
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
results += metrics_dict_to_list(query_gpu(handle))
return results
except pynvml.NVMLError:
logger.debug("Failed to collect gpu resources", exc_info=True)
return []
示例11: _find_gpu
# 需要导入模块: import pynvml [as 别名]
# 或者: from pynvml import nvmlDeviceGetHandleByIndex [as 别名]
def _find_gpu(self):
device_count = pynvml.nvmlDeviceGetCount()
for i in range(device_count):
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
gpu_processes = pynvml.nvmlDeviceGetComputeRunningProcesses(handle)
for gpu_process in gpu_processes:
if gpu_process.pid == self.pid:
self.gpu = handle
self.accounting_enabled = pynvml.nvmlDeviceGetAccountingMode(self.gpu) == pynvml.NVML_FEATURE_ENABLED
# Clear accounting statistics (requires root privileges)
#pynvml.nvmlDeviceSetAccountingMode(self.gpu, pynvml.NVML_FEATURE_DISABLED)
#pynvml.nvmlDeviceSetAccountingMode(self.gpu, pynvml.NVML_FEATURE_ENABLED)
示例12: _update_statistics
# 需要导入模块: import pynvml [as 别名]
# 或者: from pynvml import nvmlDeviceGetHandleByIndex [as 别名]
def _update_statistics(self, elapsed_steps, elapsed_time, global_step):
"""Collect and store all summary values.
Arguments:
elapsed_steps (int):
The number of steps between the current trigger event and the last one.
elapsed_time (float):
The number of seconds between the current trigger event and the last one.
global_step (tf.Tensor):
Global step tensor.
"""
# Iterate the available GPUs.
for gpu_id in range(self._device_count):
summaries = dict()
# Acquire a GPU device handle.
handle = nvml.nvmlDeviceGetHandleByIndex(gpu_id)
# Query information on the GPUs memory usage.
summaries.update(self.__query_mem(handle))
# Query information on the GPUs utilization.
summaries.update(self.__query_util(handle))
# Update the value history for the current GPU.
for k in summaries.keys():
if k in self._statistics_to_log:
self._gpu_statistics[gpu_id][k] = \
self._gpu_statistics[gpu_id][k][-self._average_n:] + [summaries[k]]
示例13: _crawl_in_system
# 需要导入模块: import pynvml [as 别名]
# 或者: from pynvml import nvmlDeviceGetHandleByIndex [as 别名]
def _crawl_in_system(self):
'''
nvidia-smi returns following: MEMORY, UTILIZATION, ECC, TEMPERATURE,
POWER, CLOCK, COMPUTE, PIDS, PERFORMANCE, SUPPORTED_CLOCKS,
PAGE_RETIREMENT, ACCOUNTING
currently, following are requested based on dlaas requirements:
utilization.gpu, utilization.memory,
memory.total, memory.free, memory.used
nvidia-smi --query-gpu=utilization.gpu,utilization.memory,\
memory.total,memory.free,memory.used --format=csv,noheader,nounits
'''
if self._init_nvml() == -1:
return
self.inspect_arr = exec_dockerps()
num_gpus = pynvml.nvmlDeviceGetCount()
for gpuid in range(0, num_gpus):
gpuhandle = pynvml.nvmlDeviceGetHandleByIndex(gpuid)
temperature = pynvml.nvmlDeviceGetTemperature(
gpuhandle, pynvml.NVML_TEMPERATURE_GPU)
memory = pynvml.nvmlDeviceGetMemoryInfo(gpuhandle)
mem_total = memory.total / 1024 / 1024
mem_used = memory.used / 1024 / 1024
mem_free = memory.free / 1024 / 1024
power_draw = pynvml.nvmlDeviceGetPowerUsage(gpuhandle) / 1000
power_limit = pynvml.nvmlDeviceGetEnforcedPowerLimit(
gpuhandle) / 1000
util = pynvml.nvmlDeviceGetUtilizationRates(gpuhandle)
util_gpu = util.gpu
util_mem = util.memory
entry = {
'utilization': {'gpu': util_gpu, 'memory': util_mem},
'memory': {'total': mem_total, 'free': mem_free,
'used': mem_used},
'temperature': temperature,
'power': {'draw': power_draw, 'limit': power_limit}
}
key = self._get_feature_key(gpuhandle, gpuid)
if gpuid == num_gpus - 1:
self._shutdown_nvml()
yield (key, entry, 'gpu')
return
示例14: measure_cpu_gpu_instant_load
# 需要导入模块: import pynvml [as 别名]
# 或者: from pynvml import nvmlDeviceGetHandleByIndex [as 别名]
def measure_cpu_gpu_instant_load():
# Get current cpu gpu load, as
# load = [rank, cpu_load, nvidia_device_id, gpu_load]
# result_arr: [load, load, ...]
if cg_load_backend_ok:
global gpu_a_load
global gpu_m_count
global p_handler
cpu_load = p_handler.cpu_percent()
gpu_m_count += 1
try:
comm = current_communicator()
if comm:
index = comm.local_rank
elif 'cuda' in str(nn.get_current_context().backend):
index = 0
else:
raise Exception
handler = pynvml.nvmlDeviceGetHandleByIndex(index)
gpu_load = [
[index, pynvml.nvmlDeviceGetUtilizationRates(handler).gpu]]
if index in gpu_a_load.keys():
gpu_a_load[index]['name'] = pynvml.nvmlDeviceGetName(
handler).decode("utf-8")
o_load = gpu_a_load[index]['load']
n_load = gpu_load[0][1]
gpu_a_load[index]['load'] = (
(gpu_m_count - 1) * o_load + n_load) / gpu_m_count
else:
gpu_a_load[index] = {
'name': pynvml.nvmlDeviceGetName(handler).decode("utf-8"),
'load': gpu_load[0][1]
}
except Exception:
gpu_load = [[-1, -1]]
callback.update_status(
('cpu_gpu_load', collect_and_shape_result(cpu_load, gpu_load)))
示例15: _log_statistics
# 需要导入模块: import pynvml [as 别名]
# 或者: from pynvml import nvmlDeviceGetHandleByIndex [as 别名]
def _log_statistics(self, elapsed_steps, elapsed_time, global_step):
"""Collect and store all summary values.
Arguments:
elapsed_steps (int):
The number of steps between the current trigger event and the last one.
elapsed_time (float):
The number of seconds between the current trigger event and the last one.
global_step (tf.Tensor):
Global step tensor.
"""
# Write summary for tensorboard.
if self._summary_writer is not None:
summary_list = list()
# Add only summaries.
for gpu_id in self._gpu_statistics.keys():
for statistic in self._gpu_statistics[gpu_id].keys():
# only add them if they are requested for logging.
if statistic in self._statistics_to_log:
values = self._gpu_statistics[gpu_id][statistic]
# Only Calculate and write average if there is data available.
if values:
avg_value = sum(values) / len(values)
avg_summary = Summary.Value(tag='{}/{}:{}'
.format(self._group_tag, gpu_id, statistic),
simple_value=avg_value)
summary_list.append(avg_summary)
# Write all statistics as simple scalar summaries.
summary = Summary(value=summary_list)
self._summary_writer.add_summary(summary, global_step)
# Log summaries to the logging stream.
if not self._suppress_stdout:
for gpu_id in self._gpu_statistics.keys():
# Acquire a GPU device handle.
handle = nvml.nvmlDeviceGetHandleByIndex(gpu_id)
# Query the device name.
name = nvml.nvmlDeviceGetName(handle).decode('utf-8')
for statistic in self._gpu_statistics[gpu_id].keys():
# Log utilization information with INFO level.
logging.debug("%s: %s", name, '{}: {}'
.format(statistic, self._gpu_statistics[gpu_id][statistic]))
# The following code has been inspired by <https://stackoverflow.com/a/45681782>: