本文整理汇总了Python中nervanagpu.NervanaGPU.fprop_cuda_conv方法的典型用法代码示例。如果您正苦于以下问题:Python NervanaGPU.fprop_cuda_conv方法的具体用法?Python NervanaGPU.fprop_cuda_conv怎么用?Python NervanaGPU.fprop_cuda_conv使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nervanagpu.NervanaGPU
的用法示例。
在下文中一共展示了NervanaGPU.fprop_cuda_conv方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: run
# 需要导入模块: from nervanagpu import NervanaGPU [as 别名]
# 或者: from nervanagpu.NervanaGPU import fprop_cuda_conv [as 别名]
def run():
ng = NervanaGPU(stochastic_round=False)
dt = np.float32
# N: Number of images in mini-batch
# C: Number of input feature maps
# K: Number of output feature maps
# D: Depth of input image
# H: Height of input image
# W: Width of input image
# T: Depth of filter kernel
# R: Height of filter kernel
# S: Width of filter kernel
#
# * images: (numColors, imgSizeY, imgSizeX, numImages) with stride given
# * filters: (numColors, filterPixels, numFilters) if conv
# * (numModules, numColors, filterPixels, numFilters) otherwise
# *
# * targets: (numFilters, numModulesY, numModulesX, numImages)
N = 128
C = 3
K = 64
D = 1
H = 64
W = 64
T = 1
R = 8
S = 8
pad_h = pad_w = 0
str_h = str_w = 4
layer = ng.conv_layer(dt, N, C, K,
D=D, H=H, W=W,
T=T, R=R, S=S,
pad_d=0, pad_h=pad_h, pad_w=pad_w,
str_d=1, str_h=str_h, str_w=str_w,
grid_P=0, grid_Q=0, update_size=None)
numImages = N
numFilters = K
numModulesY = int(math.ceil(float(H - R + 1 + 2*pad_h) / str_h))
numModulesX = int(math.ceil(float(W - S + 1 + 2*pad_w) / str_w))
print "Num Modules ", numModulesX, numModulesY
# Set up images, filters, and outputs
# imgd = np.loadtxt("im1.txt")
# img = np.zeros((64, 64, 3))
# print imgd.shape
# for i in range(3):
# img[:, :, i] = imgd[i*64:(i+1)*64, :]
# hostImages = np.tile(img)
hostImages = np.random.rand(C, H, W, N)
hostFilters = np.random.uniform(low=0.0, high=1.0, size=(C, S*R, numFilters)) #np.ones((C, S*R, numFilters)) #
hostOutputs = np.zeros((numFilters, numModulesY, numModulesX, N))
print "Input sum", np.sum(hostImages)
# Run cc2 kernel
devI = ng.array(hostImages, dtype=dt)
devF = ng.array(hostFilters, dtype=dt)
devO = ng.array(hostOutputs, dtype=dt)
ng.fprop_cuda_conv(layer, devI, devF, devO)
print "CC2 input sum: ", np.sum(devI.asnumpyarray())
print "CC2 output sum: ", np.sum(devO.asnumpyarray())
# Run maxwel kernel
# images: (C * H * W, N)
# filters: (C * S * R , numFilters)
# outputs: (numFilters * numModulesX * numModulesY, N)
devI = ng.array(hostImages.reshape((C*H*W, N)), dtype=dt)
devF = ng.array(hostFilters.reshape((C*S*R, numFilters)), dtype=dt)
devO2 = ng.array(hostOutputs.reshape(numFilters*numModulesX*numModulesY, N), dtype=dt)
ng.fprop_conv(layer, devI, devF, devO2)
print "NG input sum: ", np.sum(devI.asnumpyarray())
print "NG output sum: ", np.sum(devO2.asnumpyarray())
hostOutputs1 = np.reshape(devO.asnumpyarray(), devO2.shape)
hostOutputs2 = devO2.asnumpyarray()
for i in xrange(hostOutputs1.shape[0]):
for j in xrange(hostOutputs1.shape[1]):
assert(abs(hostOutputs1[i, j] - hostOutputs2[i, j]) < 1e-4)