本文整理匯總了Python中nervanagpu.NervanaGPU.empty方法的典型用法代碼示例。如果您正苦於以下問題:Python NervanaGPU.empty方法的具體用法?Python NervanaGPU.empty怎麽用?Python NervanaGPU.empty使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類nervanagpu.NervanaGPU
的用法示例。
在下文中一共展示了NervanaGPU.empty方法的10個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: GPU
# 需要導入模塊: from nervanagpu import NervanaGPU [as 別名]
# 或者: from nervanagpu.NervanaGPU import empty [as 別名]
class GPU(Backend):
"""
Sets up a NervanaGPU based backend for matrix operations.
Note that some functions defined in the generic Backend class such as are
cross-map pooling and normalization and adaDelta are not implemented for
this backend.
"""
default_dtype = np.float32
def __init__(self, rng_seed, stochastic_round=False, device_id=0):
self.ng = NervanaGPU(stochastic_round=stochastic_round)
logger.info("Initialized NervanaGPU with stochastic_round=%s",
stochastic_round)
self.rng_seed = rng_seed
self.rng_init()
self.device_id = device_id if device_id is not None else 0
def __getstate__(self):
"""
Defines what and how we go about serializing an instance of this class.
Returns:
self.__dict__: The full contents of the backend class instance,
except for the mem_pool which is on device and
cannot be serialized.
"""
if hasattr(self, 'mem_pool') and self.mem_pool is not None:
self.mem_pool_pickle = {'shape': self.mem_pool.shape,
'dtype': np.float32}
self.mem_pool = None
return self.__dict__
def __setstate__(self, state):
"""
Defines how we go about deserializing into an instance of this class.
Arguments:
self.__dict__: The full contents of the backend class instance,
except for the mem_pool which is on device and
cannot be serialized.
"""
self.__dict__.update(state)
self.mem_pool = self.ng.empty(self.mem_pool_pickle['shape'],
dtype=self.mem_pool_pickle['dtype'])
def init_mempool(self, shape, dtype=default_dtype):
"""
Allocates a memory pool for temporary storage
"""
self.mem_pool = self.ng.empty(shape, dtype=dtype)
def alloc_host_mem(self, shape, dtype):
return drv.pagelocked_empty(shape, dtype, order="C", mem_flags=0)
def create_stream(self):
return drv.Stream()
def async_copy(self, dest, src, stream=None):
drv.memcpy_htod_async(dest.gpudata, src, stream)
def rng_init(self):
"""
Initialize and seed the pseudo random number genrator. Random numbers
are generated on the host using numpy, then transfered to device.
"""
seed = None
if 'rng_seed' in self.__dict__:
seed = self.rng_seed
logger.info("Seeding random number generator with: %s", str(seed))
np.random.seed(seed)
def flop_timing_init(self, decorate_fc, decorate_conv, decorate_ew):
"""
Initialize FLOP timing. Wraps the specified MOP calls via a decorator
to record elapsed time and number of operations.
Arguments:
decorate_fc (list): string giving the function names of fully
connected layer forward/backward/update calls
to time.
decorate_conv (list): string giving the function names of
convolutional layer forward/backward/update
calls to time.
decorate_ew (list): string giving the function names of element-wise
calls to time.
Notes:
Must be called prior to first flop_timing_start call
"""
self.start = drv.Event()
self.end = drv.Event()
self.flop_timer = FlopsDecorator(self)
self.flop_timer.decorate(decorate_fc=decorate_fc,
decorate_conv=decorate_conv,
decorate_ew=decorate_ew)
def flop_timinig_start(self):
"""
Start a new FLOP timer.
#.........這裏部分代碼省略.........
示例2: in
# 需要導入模塊: from nervanagpu import NervanaGPU [as 別名]
# 或者: from nervanagpu.NervanaGPU import empty [as 別名]
for K, C, N in ((3072,3072*1,32),(3072,3072*1,64),(3072,3072*1,96),(3072,3072*1,128),
(3072,3072*2,32),(3072,3072*2,64),(3072,3072*2,96),(3072,3072*2,128),
(3072,3072*3,32),(3072,3072*3,64),(3072,3072*3,96),(3072,3072*3,128),
(3072,3072*4,32),(3072,3072*4,64),(3072,3072*4,96),(3072,3072*4,128),):
#(3072,3072,32+128*0),(3072,3072,64+128*0),(3072,3072,96+128*0),(3072,3072,128+128*0),
#(3072,3072,32+128*1),(3072,3072,64+128*1),(3072,3072,96+128*1),(3072,3072,128+128*1),
#(3072,3072,32+128*2),(3072,3072,64+128*2),(3072,3072,96+128*2),(3072,3072,128+128*2),
#(3072,3072,32+128*3),(3072,3072,64+128*3),(3072,3072,96+128*3),(3072,3072,128+128*3),):
for op, dimA, dimB, dimC in (
("nn", (K,C), (C,N), (K,N) ), # fprop
("tn", (K,C), (K,N), (C,N) ), # bprop
("nt", (K,N), (C,N), (K,C) )): # update
repeat = 5000 if C <= 3072 else 500
devA1 = ng.empty(dimA, dtype=dtype)
devB1 = ng.empty(dimB, dtype=dtype)
devC1 = ng.empty(dimC, dtype=dtype)
# fill with uniform randoms from -1 to 1
devA1[:] = 2 * (.5 - ng.rand())
devB1[:] = 2 * (.5 - ng.rand())
# just alias if same dtype
if dtype is np.float32:
devA2 = devA1
devB2 = devB1
# otherwise copy
else:
devA2 = ng.empty(dimA, dtype=np.float32)
devB2 = ng.empty(dimB, dtype=np.float32)
示例3: slicable
# 需要導入模塊: from nervanagpu import NervanaGPU [as 別名]
# 或者: from nervanagpu.NervanaGPU import empty [as 別名]
cpuI = np.random.uniform(0.0, 9.4, slicable(dimI,1)).astype(np.float16).astype(np.float32)
# zero pad the last row of cpu input for the sake of numpy
if pool.op == "max":
cpuI[-1,:] = np.finfo(cpuI.dtype).min
else:
cpuI[-1,:] = 0
# cpu output arrays
cpuO = np.empty(dimO, dtype=np.float32)
cpuB = np.zeros(slicable(dimI,1), dtype=np.float32)
# give gpu the input array without zero padding (not needed)
devI = ng.array(cpuI[:-1,:].reshape(dimI), dtype=dtype)
devO = ng.zeros(dimO, dtype=dtype)
devB = ng.empty(dimI, dtype=dtype)
ng.fprop_pool(pool, devI, devO, repeat=repeat)
ng.bprop_pool(pool, devI, devO, devB, repeat=repeat)
def pixel_indices(kj, mt, pr, qs):
C = pool.C
J,T,R,S = pool.JTRS
D,H,W = pool.DHW
HW = H*W
DHW = D*H*W
imax = C*D*H*W
idx = []
示例4: bprop
# 需要導入模塊: from nervanagpu import NervanaGPU [as 別名]
# 或者: from nervanagpu.NervanaGPU import empty [as 別名]
# bprop(nn): NK x KC = NC
# updat(tn): NK^T x NC = KC
repeat = 2000
for K, C, N in ((3072,3072,32),):
total = 0
for op, dimA, dimB, dimC in (
("nn", (K,C), (C,N), (K,N) ), # fprop
("tn", (K,C), (K,N), (C,N) ), # bprop
("nt", (K,N), (C,N), (K,C) ),): # update
devA = ng.empty(dimA, dtype=np.float32)
devB = ng.empty(dimB, dtype=np.float32)
devC = ng.empty(dimC, dtype=np.float32)
# fill with uniform randoms from -1 to 1
devA[:] = 2 * (.5 - ng.rand())
devB[:] = 2 * (.5 - ng.rand())
total += cublas_dot(op, devA, devB, devC, repeat=repeat, warmup=True)
print "N2 Total: ", total
total = 0
for op, dimA, dimB, dimC in (
("nt", (N,C), (K,C), (N,K) ), # fprop
("nn", (N,K), (K,C), (N,C) ), # bprop
示例5: else
# 需要導入模塊: from nervanagpu import NervanaGPU [as 別名]
# 或者: from nervanagpu.NervanaGPU import empty [as 別名]
dimA = (m,k) if op[0] == 'n' else (k,m)
dimB = (k,n) if op[1] == 'n' else (n,k)
dimC = (m,n)
if data_type == "All Ones":
cpuA = np.ones(dimA, dtype=dtype).astype(np.float32)
cpuB = np.ones(dimB, dtype=dtype).astype(np.float32)
#cpuB = np.identity(n, dtype=np.float32)
else:
cpuA = np.random.uniform(-1.0, 1.0, dimA).astype(np.float32)
cpuB = np.random.uniform(-1.0, 1.0, dimB).astype(np.float32)
devA = ng.array(cpuA, dtype=dtype)
devB = ng.array(cpuB, dtype=dtype)
devC = ng.empty(dimC, dtype=dtype)
if op[0] == 't': cpuA, devA = cpuA.T, devA.T
if op[1] == 't': cpuB, devB = cpuB.T, devB.T
ng.dot(devA, devB, devC, repeat=repeat)
if cpu:
cpuC = np.dot(cpuA, cpuB)
cpuD = devC.get()
diff = np.absolute(cpuC - cpuD)
print diff.max()
print cpuD[::max(m//4,1),::max(n//4,1)]
示例6: type
# 需要導入模塊: from nervanagpu import NervanaGPU [as 別名]
# 或者: from nervanagpu.NervanaGPU import empty [as 別名]
inception = True
# find the size of the largest buffers so they can be shared
if layer.sizeF > max_weights:
max_weights = layer.sizeF
max_weight_layer = layer
if layer.sizeI > max_deltas and type(prev_layer) is not DataLayer:
max_deltas = layer.sizeI
max_delta_layer = layer
prev_layer = layer
layers.append(layer)
# Init shared buffers (assumes consistent dtype for now)
shared_deltas.append(ng.empty(max_delta_layer.dimI, dtype=max_delta_layer.dtype))
shared_deltas.append(ng.empty(max_delta_layer.dimI, dtype=max_delta_layer.dtype))
if inception:
shared_deltas.append(ng.empty(max_delta_layer.dimI, dtype=max_delta_layer.dtype))
shared_deltas.append(ng.empty(max_delta_layer.dimI, dtype=max_delta_layer.dtype))
shared_updates = ng.empty(max_weight_layer.dimF, dtype=np.float32)
for i, layer in enumerate(layers):
print(layer)
# Intitalize buffers. Alernate shared delta buffer.
# One layer can't have the same buffer for both error in and error out.
layer.init_activations()
layer.init_weights(shared=shared_updates, zeros=zeros)
if i > 1:
示例7:
# 需要導入模塊: from nervanagpu import NervanaGPU [as 別名]
# 或者: from nervanagpu.NervanaGPU import empty [as 別名]
N,C,K = conv.NCK
D,H,W = conv.DHW
T,R,S = conv.TRS
M,P,Q = conv.MPQ
pad_d, pad_h, pad_w = conv.padding
str_d, str_h, str_w = conv.strides
alpha, beta = (1.0, 0.0)
dimI = conv.dimI2
dimF = conv.dimF2
dimO = conv.dimO2
print "cudnn:"
cuI = ng.empty(dimI[::-1], dtype=np.float32)
cuF = ng.empty(dimF[::-1], dtype=np.float32)
cuE = ng.empty(dimO[::-1], dtype=np.float32)
cuB = ng.empty(dimI[::-1], dtype=np.float32)
cuU = ng.empty(dimF[::-1], dtype=np.float32)
cuO = ng.empty(dimO[::-1], dtype=np.float32)
cuI[:] = 2 * (.5 - ng.rand())
cuF[:] = 2 * (.5 - ng.rand())
cuE[:] = 2 * (.5 - ng.rand())
#print drv.mem_get_info()
I_data = ctypes.c_void_p(int(cuI.gpudata))
F_data = ctypes.c_void_p(int(cuF.gpudata))
O_data = ctypes.c_void_p(int(cuO.gpudata))
E_data = ctypes.c_void_p(int(cuE.gpudata))
示例8: sorted
# 需要導入模塊: from nervanagpu import NervanaGPU [as 別名]
# 或者: from nervanagpu.NervanaGPU import empty [as 別名]
layers.append(layer)
# find the size of the largest buffers so they can be shared
if layer.sizeF > max_weights:
max_weights = layer.sizeF
max_weight_layer = layer
if layer.sizeO > max_deltas:
max_deltas = layer.sizeO
max_delta_layer = layer
# for layer in sorted(layers, key=lambda l: l.sizeO, reverse=True):
# print("%d %s" % (layer.sizeO, layer))
# Init shared buffers (assumes consistent dtype for now)
shared_deltas[0] = ng.empty(max_delta_layer.dimO2, dtype=max_delta_layer.dtype)
shared_deltas[1] = ng.empty(max_delta_layer.dimO2, dtype=max_delta_layer.dtype)
shared_weights = ng.empty(max_weight_layer.dimF2, dtype=max_weight_layer.dtype)
prev_layer = None
delta = False
for layer in layers:
print(layer)
# Intitalize buffers. Alernate shared delta buffer.
# One layer can't have the same buffer for both error in and error out.
layer.init_activations()
layer.init_weights(shared=shared_weights)
layer.init_deltas(shared=shared_deltas[delta])
示例9:
# 需要導入模塊: from nervanagpu import NervanaGPU [as 別名]
# 或者: from nervanagpu.NervanaGPU import empty [as 別名]
dimO = (X,N,K)
if ones:
cpuI = np.ones(dimI, dtype=np.float32)
cpuE = np.ones(dimO, dtype=np.float32)
cpuW = np.ones(dimW, dtype=np.float32)
else:
cpuI = np.random.uniform(-1.0, 1.0, dimI).astype(dtype).astype(np.float32)
cpuE = np.random.uniform(-1.0, 1.0, dimO).astype(dtype).astype(np.float32)
cpuW = np.random.uniform(-1.0, 1.0, dimW).astype(dtype).astype(np.float32)
devI = ng.array(cpuI, dtype=dtype)
devE = ng.array(cpuE, dtype=dtype)
devW = ng.array(cpuW, dtype=dtype)
devO = ng.empty(dimO, dtype=dtype)
devB = ng.empty(dimI, dtype=dtype)
devU = ng.empty(dimW, dtype=dtype)
if Nin:
ng.batched_dot(devW, devI, devO, repeat=repeat, size=size) # fprop
ng.batched_dot(devW.T, devE, devB, repeat=repeat, size=size) # bprop
ng.batched_dot(devE, devI.T, devU, repeat=repeat, size=size) # update
else:
ng.batched_dot(devI, devW.T, devO, repeat=repeat, size=size) # fprop
ng.batched_dot(devE, devW, devB, repeat=repeat, size=size) # bprop
ng.batched_dot(devE.T, devI, devU, repeat=repeat, size=size) # update
if cpu:
cpuO = np.empty(dimO, dtype=np.float32)
示例10: padding
# 需要導入模塊: from nervanagpu import NervanaGPU [as 別名]
# 或者: from nervanagpu.NervanaGPU import empty [as 別名]
cpuI[-1,:] = 0.0
# cpu output arrays
cpuO = np.zeros(dimO, dtype=np.float32)
cpuB = np.zeros(slicable(dimI,1), dtype=np.float32)
cpuU = np.zeros(slicable(dimF), dtype=np.float32)
# give gpu the input array without zero padding (not needed)
devI = ng.array(cpuI[:-1,:].reshape(dimI), dtype=dtype)
devF = ng.array(cpuF.reshape(dimF), dtype=dtype)
devE = ng.array(cpuE, dtype=dtype)
devO = devB = devU = 0
if "fprop" in ops:
devO = ng.empty(dimO, dtype=dtype)
ng.fprop_conv(conv, devI, devF, devO, alpha=1.0, repeat=repeat)
if "bprop" in ops:
devB = ng.empty(dimI, dtype=dtype)
ng.bprop_conv(conv, devF, devE, devB, alpha=1.0, repeat=repeat)
if "update" in ops:
devU = ng.empty(dimF, dtype=dtype)
ng.update_conv(conv, devI, devE, devU, alpha=1.0, repeat=repeat)
def pixel_indices(mt, pr, qs):
T,R,S = conv.TRS
D,H,W = conv.DHW