本文整理匯總了Python中nervanagpu.NervanaGPU.array方法的典型用法代碼示例。如果您正苦於以下問題:Python NervanaGPU.array方法的具體用法?Python NervanaGPU.array怎麽用?Python NervanaGPU.array使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類nervanagpu.NervanaGPU
的用法示例。
在下文中一共展示了NervanaGPU.array方法的8個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: slicable
# 需要導入模塊: from nervanagpu import NervanaGPU [as 別名]
# 或者: from nervanagpu.NervanaGPU import array [as 別名]
# cpu input arrays
cpuI = np.random.uniform(0.0, 9.4, slicable(dimI,1)).astype(np.float16).astype(np.float32)
# zero pad the last row of cpu input for the sake of numpy
if pool.op == "max":
cpuI[-1,:] = np.finfo(cpuI.dtype).min
else:
cpuI[-1,:] = 0
# cpu output arrays
cpuO = np.empty(dimO, dtype=np.float32)
cpuB = np.zeros(slicable(dimI,1), dtype=np.float32)
# give gpu the input array without zero padding (not needed)
devI = ng.array(cpuI[:-1,:].reshape(dimI), dtype=dtype)
devO = ng.zeros(dimO, dtype=dtype)
devB = ng.empty(dimI, dtype=dtype)
ng.fprop_pool(pool, devI, devO, repeat=repeat)
ng.bprop_pool(pool, devI, devO, devB, repeat=repeat)
def pixel_indices(kj, mt, pr, qs):
C = pool.C
J,T,R,S = pool.JTRS
D,H,W = pool.DHW
HW = H*W
DHW = D*H*W
imax = C*D*H*W
示例2: NervanaGPU
# 需要導入模塊: from nervanagpu import NervanaGPU [as 別名]
# 或者: from nervanagpu.NervanaGPU import array [as 別名]
import numpy as np
import pycuda.autoinit
from nervanagpu import NervanaGPU
nrv = NervanaGPU(default_dtype=np.float32)
a = nrv.array(np.random.randn(200,200))
b = nrv.empty_like(a)
b[:] = a**2
assert not np.any(np.isnan(b.get())), "Shouldn't have any nan's here"
示例3: GPU
# 需要導入模塊: from nervanagpu import NervanaGPU [as 別名]
# 或者: from nervanagpu.NervanaGPU import array [as 別名]
#.........這裏部分代碼省略.........
self.flop_timer.decorate(decorate_fc=decorate_fc,
decorate_conv=decorate_conv,
decorate_ew=decorate_ew)
def flop_timinig_start(self):
"""
Start a new FLOP timer.
Returns:
None: dummy value (not used)
"""
return self.start.record()
def flop_timing_finish(self, start_time):
"""
Complete current FLOP timing.
Arguments:
start_time (unused): ignored.
Returns:
float: elapsed time in seconds since prior flop_timing_start call.
"""
self.end.record()
self.end.synchronize()
return self.end.time_since(self.start)
def uniform(self, low=0.0, high=1.0, shape=1, dtype=default_dtype,
name=None, allocator=drv.mem_alloc):
"""
generate numpy random number and convert to a GPUTensor.
If called with dype=None it will probably explode
"""
ary = np.random.uniform(low, high, shape)
return self.ng.array(ary, dtype, name)
def normal(self, loc=0.0, scale=1.0, size=1, dtype=default_dtype,
name=None, allocator=drv.mem_alloc):
"""
Gaussian/Normal random number sample generation
"""
ary = np.random.normal(loc, scale, size)
return self.ng.array(ary, dtype, name)
def fprop_fc(self, out, inputs, weights, layer=None):
"""
Forward propagate the inputs of a fully connected network layer to
produce output pre-activations (ready for transformation by an
activation function).
Arguments:
out (GPUTensor): Where to store the forward propagated results.
inputs (GPUTensor): Will be either the dataset input values (first
layer), or the outputs from the previous layer.
weights (GPUTensor): The weight coefficient values for this layer.
layer (Layer): The layer object.
"""
self.ng.dot(weights, inputs, out)
def bprop_fc(self, out, weights, deltas, layer=None):
"""
Backward propagate the error through a fully connected network layer.
Arguments:
out (GPUTensor): Where to store the backward propagated errors.
weights (GPUTensor): The weight coefficient values for this layer.
deltas (GPUTensor): The error values for this layer
示例4: in
# 需要導入模塊: from nervanagpu import NervanaGPU [as 別名]
# 或者: from nervanagpu.NervanaGPU import array [as 別名]
m, n, k = size
for op in ("tn","nn","nt"): #"tn","nn","nt"
dimA = (m,k) if op[0] == 'n' else (k,m)
dimB = (k,n) if op[1] == 'n' else (n,k)
dimC = (m,n)
if data_type == "All Ones":
cpuA = np.ones(dimA, dtype=dtype).astype(np.float32)
cpuB = np.ones(dimB, dtype=dtype).astype(np.float32)
#cpuB = np.identity(n, dtype=np.float32)
else:
cpuA = np.random.uniform(-1.0, 1.0, dimA).astype(np.float32)
cpuB = np.random.uniform(-1.0, 1.0, dimB).astype(np.float32)
devA = ng.array(cpuA, dtype=dtype)
devB = ng.array(cpuB, dtype=dtype)
devC = ng.empty(dimC, dtype=dtype)
if op[0] == 't': cpuA, devA = cpuA.T, devA.T
if op[1] == 't': cpuB, devB = cpuB.T, devB.T
ng.dot(devA, devB, devC, repeat=repeat)
if cpu:
cpuC = np.dot(cpuA, cpuB)
cpuD = devC.get()
diff = np.absolute(cpuC - cpuD)
示例5: GPU
# 需要導入模塊: from nervanagpu import NervanaGPU [as 別名]
# 或者: from nervanagpu.NervanaGPU import array [as 別名]
#.........這裏部分代碼省略.........
self.flop_timer.decorate(decorate_fc=decorate_fc,
decorate_conv=decorate_conv,
decorate_ew=decorate_ew)
def flop_timinig_start(self):
"""
Start a new FLOP timer.
Returns:
None: dummy value (not used)
"""
return self.start.record()
def flop_timing_finish(self, start_time):
"""
Complete current FLOP timing.
Arguments:
start_time (unused): ignored.
Returns:
float: elapsed time in seconds since prior flop_timing_start call.
"""
self.end.record()
self.end.synchronize()
return self.end.time_since(self.start)
def uniform(self, low=0.0, high=1.0, size=1, dtype=default_dtype,
persist_values=True, name=None):
"""
generate numpy random number and convert to a GPUTensor.
If called with dype=None it will probably explode
"""
ary = np.random.uniform(low, high, size)
return self.ng.array(ary, dtype=dtype, name=name)
def normal(self, loc=0.0, scale=1.0, size=1, dtype=default_dtype,
persist_values=True, name=None):
"""
Gaussian/Normal random number sample generation
"""
ary = np.random.normal(loc, scale, size)
return self.ng.array(ary, dtype=dtype, name=name)
def fprop_fc(self, out, inputs, weights, layer=None):
"""
Forward propagate the inputs of a fully connected network layer to
produce output pre-activations (ready for transformation by an
activation function).
Arguments:
out (GPUTensor): Where to store the forward propagated results.
inputs (GPUTensor): Will be either the dataset input values (first
layer), or the outputs from the previous layer.
weights (GPUTensor): The weight coefficient values for this layer.
layer (Layer): The layer object.
"""
self.ng.dot(weights, inputs, out)
def bprop_fc(self, out, weights, deltas, layer=None):
"""
Backward propagate the error through a fully connected network layer.
Arguments:
out (GPUTensor): Where to store the backward propagated errors.
weights (GPUTensor): The weight coefficient values for this layer.
deltas (GPUTensor): The error values for this layer
示例6:
# 需要導入模塊: from nervanagpu import NervanaGPU [as 別名]
# 或者: from nervanagpu.NervanaGPU import array [as 別名]
dimI = (X,C,N)
dimO = (X,K,N)
else:
dimI = (X,N,C)
dimO = (X,N,K)
if ones:
cpuI = np.ones(dimI, dtype=np.float32)
cpuE = np.ones(dimO, dtype=np.float32)
cpuW = np.ones(dimW, dtype=np.float32)
else:
cpuI = np.random.uniform(-1.0, 1.0, dimI).astype(dtype).astype(np.float32)
cpuE = np.random.uniform(-1.0, 1.0, dimO).astype(dtype).astype(np.float32)
cpuW = np.random.uniform(-1.0, 1.0, dimW).astype(dtype).astype(np.float32)
devI = ng.array(cpuI, dtype=dtype)
devE = ng.array(cpuE, dtype=dtype)
devW = ng.array(cpuW, dtype=dtype)
devO = ng.empty(dimO, dtype=dtype)
devB = ng.empty(dimI, dtype=dtype)
devU = ng.empty(dimW, dtype=dtype)
if Nin:
ng.batched_dot(devW, devI, devO, repeat=repeat, size=size) # fprop
ng.batched_dot(devW.T, devE, devB, repeat=repeat, size=size) # bprop
ng.batched_dot(devE, devI.T, devU, repeat=repeat, size=size) # update
else:
ng.batched_dot(devI, devW.T, devO, repeat=repeat, size=size) # fprop
ng.batched_dot(devE, devW, devB, repeat=repeat, size=size) # bprop
ng.batched_dot(devE.T, devI, devU, repeat=repeat, size=size) # update
示例7: run
# 需要導入模塊: from nervanagpu import NervanaGPU [as 別名]
# 或者: from nervanagpu.NervanaGPU import array [as 別名]
def run():
ng = NervanaGPU(stochastic_round=False)
dt = np.float32
# N: Number of images in mini-batch
# C: Number of input feature maps
# K: Number of output feature maps
# D: Depth of input image
# H: Height of input image
# W: Width of input image
# T: Depth of filter kernel
# R: Height of filter kernel
# S: Width of filter kernel
#
# * images: (numColors, imgSizeY, imgSizeX, numImages) with stride given
# * filters: (numColors, filterPixels, numFilters) if conv
# * (numModules, numColors, filterPixels, numFilters) otherwise
# *
# * targets: (numFilters, numModulesY, numModulesX, numImages)
N = 128
C = 3
K = 64
D = 1
H = 64
W = 64
T = 1
R = 8
S = 8
pad_h = pad_w = 0
str_h = str_w = 4
layer = ng.conv_layer(dt, N, C, K,
D=D, H=H, W=W,
T=T, R=R, S=S,
pad_d=0, pad_h=pad_h, pad_w=pad_w,
str_d=1, str_h=str_h, str_w=str_w,
grid_P=0, grid_Q=0, update_size=None)
numImages = N
numFilters = K
numModulesY = int(math.ceil(float(H - R + 1 + 2*pad_h) / str_h))
numModulesX = int(math.ceil(float(W - S + 1 + 2*pad_w) / str_w))
print "Num Modules ", numModulesX, numModulesY
# Set up images, filters, and outputs
# imgd = np.loadtxt("im1.txt")
# img = np.zeros((64, 64, 3))
# print imgd.shape
# for i in range(3):
# img[:, :, i] = imgd[i*64:(i+1)*64, :]
# hostImages = np.tile(img)
hostImages = np.random.rand(C, H, W, N)
hostFilters = np.random.uniform(low=0.0, high=1.0, size=(C, S*R, numFilters)) #np.ones((C, S*R, numFilters)) #
hostOutputs = np.zeros((numFilters, numModulesY, numModulesX, N))
print "Input sum", np.sum(hostImages)
# Run cc2 kernel
devI = ng.array(hostImages, dtype=dt)
devF = ng.array(hostFilters, dtype=dt)
devO = ng.array(hostOutputs, dtype=dt)
ng.fprop_cuda_conv(layer, devI, devF, devO)
print "CC2 input sum: ", np.sum(devI.asnumpyarray())
print "CC2 output sum: ", np.sum(devO.asnumpyarray())
# Run maxwel kernel
# images: (C * H * W, N)
# filters: (C * S * R , numFilters)
# outputs: (numFilters * numModulesX * numModulesY, N)
devI = ng.array(hostImages.reshape((C*H*W, N)), dtype=dt)
devF = ng.array(hostFilters.reshape((C*S*R, numFilters)), dtype=dt)
devO2 = ng.array(hostOutputs.reshape(numFilters*numModulesX*numModulesY, N), dtype=dt)
ng.fprop_conv(layer, devI, devF, devO2)
print "NG input sum: ", np.sum(devI.asnumpyarray())
print "NG output sum: ", np.sum(devO2.asnumpyarray())
hostOutputs1 = np.reshape(devO.asnumpyarray(), devO2.shape)
hostOutputs2 = devO2.asnumpyarray()
for i in xrange(hostOutputs1.shape[0]):
for j in xrange(hostOutputs1.shape[1]):
assert(abs(hostOutputs1[i, j] - hostOutputs2[i, j]) < 1e-4)
示例8: slicable
# 需要導入模塊: from nervanagpu import NervanaGPU [as 別名]
# 或者: from nervanagpu.NervanaGPU import array [as 別名]
else:
cpuI = np.random.uniform(-127.0, 127.0, slicable(dimI,1)).astype(np.float32) #.astype(np.uint8) .astype(np.int8)
cpuF = np.random.uniform(0.0, 1.1, slicable(dimF) ).astype(np.float32)
cpuE = np.random.uniform(-1.01, 1.01, dimO ).astype(np.float32)
# zero pad the last row of cpu input for the sake of numpy
cpuI[-1,:] = 0.0
# cpu output arrays
cpuO = np.zeros(dimO, dtype=np.float32)
cpuB = np.zeros(slicable(dimI,1), dtype=np.float32)
cpuU = np.zeros(slicable(dimF), dtype=np.float32)
# give gpu the input array without zero padding (not needed)
devI = ng.array(cpuI[:-1,:].reshape(dimI), dtype=dtype)
devF = ng.array(cpuF.reshape(dimF), dtype=dtype)
devE = ng.array(cpuE, dtype=dtype)
devO = devB = devU = 0
if "fprop" in ops:
devO = ng.empty(dimO, dtype=dtype)
ng.fprop_conv(conv, devI, devF, devO, alpha=1.0, repeat=repeat)
if "bprop" in ops:
devB = ng.empty(dimI, dtype=dtype)
ng.bprop_conv(conv, devF, devE, devB, alpha=1.0, repeat=repeat)
if "update" in ops:
devU = ng.empty(dimF, dtype=dtype)