本文整理匯總了Python中nervanagpu.NervanaGPU類的典型用法代碼示例。如果您正苦於以下問題:Python NervanaGPU類的具體用法?Python NervanaGPU怎麽用?Python NervanaGPU使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了NervanaGPU類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: __init__
def __init__(self, rng_seed, stochastic_round=False, device_id=0):
self.ng = NervanaGPU(stochastic_round=stochastic_round)
logger.info("Initialized NervanaGPU with stochastic_round=%s",
stochastic_round)
self.rng_seed = rng_seed
self.rng_init()
self.device_id = device_id if device_id is not None else 0
示例2: __init__
def __init__(self, rng_seed, stochastic_round=False, device_id=0):
import pycuda.driver as drv
drv.init()
global ctx
ctx = drv.Device(device_id).make_context()
import atexit
atexit.register(ctx.pop)
self.ng = NervanaGPU(stochastic_round=stochastic_round)
logger.info("Initialized NervanaGPU with stochastic_round=%s",
stochastic_round)
self.rng_seed = rng_seed
self.rng_init()
self.device_id = device_id if device_id is not None else 0
示例3: NervanaGPU
import numpy as np
import pycuda.driver as drv
from nervanagpu import NervanaGPU
from pycuda.autoinit import context
from operator import mul
print context.get_device().name()
np.set_printoptions(threshold=8193, linewidth=600, formatter={'int':lambda x: "%10d" % x,'float':lambda x: "% .3f" % x})
dtype = np.float16
cpu = 1
repeat = 1
ng = NervanaGPU(stochastic_round=False, bench=True)
pool = ng.pool_layer(
"max",
64, # N
64,1,64,64, # C,D,H,W
4,1,2,2, # J,T,R,S
0,0,0,0, # padding
4,1,2,2) # strides
dimI = pool.dimI
dimO = pool.dimO
# colapse pooling dimensions into one
# this allows for easy cpu pooling in numpy
def slicable(dim, pad=0):
示例4: range
# Swap A and B to map from C order to Fortran
for r in range(repeat):
cublas.cublasSgemm(handle, opB, opA, n, m, k, 1.0, B.gpudata, ldb, A.gpudata, lda, 0.0, C.gpudata, ldc)
end.record()
end.synchronize()
msecs = end.time_since(start) / repeat
gflops = (m * n * k * 2.0) / (msecs * 1000000.0)
print "%7.3f msecs %4.0f gflops (%s_%s : %d,%d,%d)" % (msecs,gflops,"cublas",op,m,n,k)
return gflops
np.set_printoptions(threshold=8193, linewidth=600, formatter={'float':lambda x: "% .0f" % x})
ng = NervanaGPU(stochastic_round=False, bench=True)
for dtype in (np.float16,np.float32):
for K, C, N in ((3072,3072*1,32),(3072,3072*1,64),(3072,3072*1,96),(3072,3072*1,128),
(3072,3072*2,32),(3072,3072*2,64),(3072,3072*2,96),(3072,3072*2,128),
(3072,3072*3,32),(3072,3072*3,64),(3072,3072*3,96),(3072,3072*3,128),
(3072,3072*4,32),(3072,3072*4,64),(3072,3072*4,96),(3072,3072*4,128),):
#(3072,3072,32+128*0),(3072,3072,64+128*0),(3072,3072,96+128*0),(3072,3072,128+128*0),
#(3072,3072,32+128*1),(3072,3072,64+128*1),(3072,3072,96+128*1),(3072,3072,128+128*1),
#(3072,3072,32+128*2),(3072,3072,64+128*2),(3072,3072,96+128*2),(3072,3072,128+128*2),
#(3072,3072,32+128*3),(3072,3072,64+128*3),(3072,3072,96+128*3),(3072,3072,128+128*3),):
for op, dimA, dimB, dimC in (
("nn", (K,C), (C,N), (K,N) ), # fprop
("tn", (K,C), (K,N), (C,N) ), # bprop
("nt", (K,N), (C,N), (K,C) )): # update
示例5: NervanaGPU
import numpy as np
import pycuda.autoinit
from nervanagpu import NervanaGPU
nrv = NervanaGPU(default_dtype=np.float32)
a = nrv.array(np.random.randn(200,200))
b = nrv.empty_like(a)
b[:] = a**2
assert not np.any(np.isnan(b.get())), "Shouldn't have any nan's here"
示例6: range
# Swap A and B to map from C order to Fortran
for r in range(repeat):
cublas.cublasSgemm(handle, opB, opA, n, m, k, alpha, B.gpudata, ldb, A.gpudata, lda, beta, C.gpudata, ldc)
if repeat > 1:
end.record()
end.synchronize()
msecs = end.time_since(start) / repeat
gflops = (m * n * k * 2.0) / (msecs * 1000000.0)
print("%7.3f msecs %4.0f gflops (%s_%s : %d,%d,%d)" %
(msecs,gflops,"cublas",op,m,n,k))
np.set_printoptions(threshold=8193, linewidth=600, formatter={'float':lambda x: "% .0f" % x})
ng = NervanaGPU(stochastic_round=0, bench=0)
small_1 = (1,2,3,4,5,6,7,8,9,16,32,64,65,72,120,127,128,192)
medium_1 = (32,64,128,192,778,785,786,787,794)
big_1 = (32,64,128,1532,1535,1536,1537,1540,3073,4095)
small_2 = (8,16,32,64,72,96,120,128,192)
medium_2 = (32,64,128,192,256,786-32,786-16,786,786+16,786+32)
big_2 = (32,64,128,1536-80,1536-64,1536,1536+64,1536+80,3072,4096)
# sharedDim = (4096,4096)
# devA1s = ng.empty(sharedDim, dtype=np.float32)
# devB1s = ng.empty(sharedDim, dtype=np.float32)
# devC1s = ng.empty(sharedDim, dtype=np.float32)
# devA2s = ng.empty(sharedDim, dtype=np.float32)
# devB2s = ng.empty(sharedDim, dtype=np.float32)
示例7: NervanaGPU
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from nervanagpu import NervanaGPU
from pycuda.autoinit import context
print context.get_device().name()
np.set_printoptions(threshold=8193, linewidth=600, formatter={'float':lambda x: "% .0f" % x})
ng = NervanaGPU(stochastic_round=False, bench=True)
dtype = np.float16
repeat = 1
cpu = 1 # Set CPU to 1 to check against CPU
for data_type in ("All Ones", "Random Data",): #"All Ones", "Random Data"
print data_type
for size in ((3072,3072,3072*2),): #(4095,4095,4095)
m, n, k = size
for op in ("tn","nn","nt"): #"tn","nn","nt"
dimA = (m,k) if op[0] == 'n' else (k,m)
dimB = (k,n) if op[1] == 'n' else (n,k)
dimC = (m,n)
示例8: range
# Swap A and B to map from C order to Fortran
for r in range(repeat):
cublas.cublasSgemm(handle, opB, opA, n, m, k, alpha, B.gpudata, ldb, A.gpudata, lda, beta, C.gpudata, ldc)
end.record()
end.synchronize()
msecs = end.time_since(start) / repeat
gflops = (m * n * k * 2.0) / (msecs * 1000000.0)
print("%7.3f msecs %4.0f gflops (%s_%s : %d,%d,%d)" %
(msecs,gflops,"cublas",op,m,n,k))
np.set_printoptions(threshold=8193, linewidth=600, formatter={'float':lambda x: "% .0f" % x})
ng = NervanaGPU(stochastic_round=False, bench=True)
repeat = 1
for dtype in (np.float16, np.float32,):
for K, C, N in ((32,4096,1512),):
for alpha, beta in ((1.0,0.0), (0.5,0.5)):
for op, dimA, dimB, dimC in (
("nn", (K,C), (C,N), (K,N) ), # fprop
("tn", (K,C), (K,N), (C,N) ), # bprop
("nt", (K,N), (C,N), (K,C) ),): # update
devA1 = ng.empty(dimA, dtype=dtype)
示例9: __init__
def __init__(self, rng_seed, stochastic_round=False, device_id=0,
num_dev=2):
drv.init()
self.num_dev = num_dev
if device_id == 0:
self.dev_list = range(num_dev)
else:
self.dev_list = device_id
assert len(self.dev_list) == self.num_dev
assert self.num_dev <= drv.Device.count()
self.ctxs = []
self.devs = []
self._strms = []
self._redstrms = []
self._events = []
self._redevents = []
self.async = True
self._nostrms = [None for i in self.dev_list]
for i in self.dev_list:
self.devs.append(drv.Device(i))
for dev in self.devs:
self.ctxs.append(
dev.make_context(drv.ctx_flags.SCHED_BLOCKING_SYNC))
self._strms.append(drv.Stream())
self._redstrms.append(drv.Stream())
self._events.append(drv.Event())
self._redevents.append(drv.Event())
drv.Context.pop()
self.ctxs[0].push()
atexit.register(drv.Context.pop)
MGPUTensor.ctxs = self.ctxs
MGPUTensor.num_dev = num_dev
self.ng = NervanaGPU(stochastic_round=stochastic_round)
logger.info("Initialized %d device NervanaGPU, stochastic_round=%s",
num_dev, stochastic_round)
self.ng.block = None
self.rng_seed = rng_seed
self.rng_init()
# Setup the pairwise contexts
# TODO clean up this code to avoid indexing
for dev1, ctx1 in zip(self.devs, self.ctxs):
ctx1.push()
for dev2, ctx2 in zip(self.devs, self.ctxs):
if dev1 == dev2:
continue
if dev1.can_access_peer(dev2):
ctx1.enable_peer_access(ctx2)
else:
print('Cannot enable peer access between '
'{:d} and {:d}'.format(dev1, dev2))
ctx1.pop()
示例10: NervanaGPU
# Note GoogLeNet2 only fits in fp16 currently. I need to work out delta sharing in inception layers.
nets = ("Alexnet","Overfeat","GoogLeNet1","GoogLeNet2","VGG","VGG_E",)
#Available dtypes: np.float16, np.float32
dtypes = (np.float16,np.float32)
# number of full iterations
loops = 10
# show bechmark details for each layer
layer_bench = 0
# show layer stats after each operation
print_stats = 0
# run network with all zeros to see speed difference
zeros = 0
ng = NervanaGPU(bench=layer_bench)
# common convolutional layer settings
conv11 = { "R":11, "S":11, "pad_h":2, "pad_w":2, "str_h":4, "str_w":4 }
conv11p0 = { "R":11, "S":11, "pad_h":0, "pad_w":0, "str_h":4, "str_w":4 }
conv7 = { "R":7, "S":7, "pad_h":3, "pad_w":3, "str_h":2, "str_w":2 }
conv5 = { "R":5, "S":5, "pad_h":2, "pad_w":2 }
conv5p0 = { "R":5, "S":5, "pad_h":0, "pad_w":0 }
conv3 = { "R":3, "S":3, "pad_h":1, "pad_w":1 }
conv2 = { "R":2, "S":2, "pad_h":0, "pad_w":0, "str_h":2, "str_w":2 }
conv1 = { "R":1, "S":1, "pad_h":0, "pad_w":0 }
# traditional pooling
pool2s2p0 = { "R":2, "S":2 }
pool3s2p0 = { "R":3, "S":3, "str_h":2, "str_w":2 }
pool3s2p1 = { "R":3, "S":3, "str_h":2, "str_w":2, "pad_h":1, "pad_w":1 }
示例11: start_bench
dtype = np.float16
repeat = 20
start, end = (drv.Event(), drv.Event())
def start_bench():
start.record()
def end_bench(op):
end.record()
end.synchronize()
msecs = end.time_since(start) / repeat
gflops = conv.flops / (msecs * 1000000.0)
print "%7.3f msecs %8.3f gflops (%s: %s)" % (msecs, gflops, op, conv)
ng = NervanaGPU(stochastic_round=False, bench=True)
# Create a cuDNN context
cudnn = libcudnn.cudnnCreate()
C_desc = libcudnn.cudnnCreateConvolutionDescriptor()
I_desc = libcudnn.cudnnCreateTensorDescriptor()
O_desc = libcudnn.cudnnCreateTensorDescriptor()
E_desc = libcudnn.cudnnCreateTensorDescriptor()
B_desc = libcudnn.cudnnCreateTensorDescriptor()
F_desc = libcudnn.cudnnCreateFilterDescriptor()
U_desc = libcudnn.cudnnCreateFilterDescriptor()
# Set some options and tensor dimensions
NCHW_fmt = libcudnn.cudnnTensorFormat['CUDNN_TENSOR_NCHW']
cu_dtype = libcudnn.cudnnDataType['CUDNN_DATA_FLOAT']
示例12: print
from pycuda.autoinit import context
from nervanagpu import NervanaGPU
from nervanagpu.layers import DataLayer, ConvLayer, PoolLayer, FullLayer
print(context.get_device().name())
# Compare results here:
# https://github.com/soumith/convnet-benchmarks
# number of full iterations
loops = 10
# show bechmark details for each layer
layer_bench = 0
# show layer stats after each operation
print_stats = 0
ng = NervanaGPU(bench=layer_bench)
# don't learn, just benchmark
momentum = 0.0
learning_rate = 0.0
# common convolutional layer settings
conv3 = { "R":3, "S":3, "pad_h":1, "pad_w":1 }
conv1 = { "R":1, "S":1, "pad_h":0, "pad_w":0 }
# traditional pooling
pool2 = { "op":"max", "R":2, "S":2 }
pool3 = { "op":"max", "R":3, "S":3, "str_h":2, "str_w":2 }
# maxout pooling
pool1j2 = { "op":"max", "J":2 } # maxout in the fc layers
示例13: NervanaGPU
#!/usr/bin/python
import numpy as np
import pycuda.driver as drv
from nervanagpu import NervanaGPU
from pycuda.autoinit import context
from ipdb import set_trace
np.set_printoptions(threshold=8192*4, linewidth=600, formatter={'int':lambda x: "%2d" % x,'float':lambda x: "%2.0f" % x})
ng = NervanaGPU(stochastic_round=0, bench=1)
dtype = np.float32 # np.float16 or np.float32
repeat = 50 # repeat count for benchmarking
ones = 0 # simpler data for debugging
cpu = 0 # valdiate against numpy
size = 32 # 32, 64, 128, None=auto
X = 100 # Batch Size
N = 32 # Minibatch Size
C = 3072 # Input Features
K = 3072 # Output Features
Nin = True
dimW = (K,C)
if Nin:
dimI = (X,C,N)
dimO = (X,K,N)
else:
dimI = (X,N,C)
dimO = (X,N,K)
示例14: run
def run():
ng = NervanaGPU(stochastic_round=False)
dt = np.float32
# N: Number of images in mini-batch
# C: Number of input feature maps
# K: Number of output feature maps
# D: Depth of input image
# H: Height of input image
# W: Width of input image
# T: Depth of filter kernel
# R: Height of filter kernel
# S: Width of filter kernel
#
# * images: (numColors, imgSizeY, imgSizeX, numImages) with stride given
# * filters: (numColors, filterPixels, numFilters) if conv
# * (numModules, numColors, filterPixels, numFilters) otherwise
# *
# * targets: (numFilters, numModulesY, numModulesX, numImages)
N = 128
C = 3
K = 64
D = 1
H = 64
W = 64
T = 1
R = 8
S = 8
pad_h = pad_w = 0
str_h = str_w = 4
layer = ng.conv_layer(dt, N, C, K,
D=D, H=H, W=W,
T=T, R=R, S=S,
pad_d=0, pad_h=pad_h, pad_w=pad_w,
str_d=1, str_h=str_h, str_w=str_w,
grid_P=0, grid_Q=0, update_size=None)
numImages = N
numFilters = K
numModulesY = int(math.ceil(float(H - R + 1 + 2*pad_h) / str_h))
numModulesX = int(math.ceil(float(W - S + 1 + 2*pad_w) / str_w))
print "Num Modules ", numModulesX, numModulesY
# Set up images, filters, and outputs
# imgd = np.loadtxt("im1.txt")
# img = np.zeros((64, 64, 3))
# print imgd.shape
# for i in range(3):
# img[:, :, i] = imgd[i*64:(i+1)*64, :]
# hostImages = np.tile(img)
hostImages = np.random.rand(C, H, W, N)
hostFilters = np.random.uniform(low=0.0, high=1.0, size=(C, S*R, numFilters)) #np.ones((C, S*R, numFilters)) #
hostOutputs = np.zeros((numFilters, numModulesY, numModulesX, N))
print "Input sum", np.sum(hostImages)
# Run cc2 kernel
devI = ng.array(hostImages, dtype=dt)
devF = ng.array(hostFilters, dtype=dt)
devO = ng.array(hostOutputs, dtype=dt)
ng.fprop_cuda_conv(layer, devI, devF, devO)
print "CC2 input sum: ", np.sum(devI.asnumpyarray())
print "CC2 output sum: ", np.sum(devO.asnumpyarray())
# Run maxwel kernel
# images: (C * H * W, N)
# filters: (C * S * R , numFilters)
# outputs: (numFilters * numModulesX * numModulesY, N)
devI = ng.array(hostImages.reshape((C*H*W, N)), dtype=dt)
devF = ng.array(hostFilters.reshape((C*S*R, numFilters)), dtype=dt)
devO2 = ng.array(hostOutputs.reshape(numFilters*numModulesX*numModulesY, N), dtype=dt)
ng.fprop_conv(layer, devI, devF, devO2)
print "NG input sum: ", np.sum(devI.asnumpyarray())
print "NG output sum: ", np.sum(devO2.asnumpyarray())
hostOutputs1 = np.reshape(devO.asnumpyarray(), devO2.shape)
hostOutputs2 = devO2.asnumpyarray()
for i in xrange(hostOutputs1.shape[0]):
for j in xrange(hostOutputs1.shape[1]):
assert(abs(hostOutputs1[i, j] - hostOutputs2[i, j]) < 1e-4)
示例15: set
import pycuda.driver as drv
from nervanagpu import NervanaGPU
from pycuda.autoinit import context
from operator import mul
print context.get_device().name()
np.set_printoptions(threshold=8193, linewidth=600, formatter={'int':lambda x: "%10d" % x,'float':lambda x: "% .0f" % x})
ops = set(("update",)) # "fprop","bprop","update"
ones = 0
cpu = 0 # Set CPU to 1 to check against CPU
repeat = 1
dtype = np.float32
ng = NervanaGPU(stochastic_round=False, bench=True)
conv = ng.conv_layer(
dtype,
16,3,8, # N,C,K
1,64,64, # D,H,W
1,3,3, # T,R,S
0,1,1, # padding
1,1,1) # strides
dimI = conv.dimI
dimF = conv.dimF
dimO = conv.dimO
# colapse outer dimensions into one and preserve inner dimension