当前位置: 首页>>代码示例>>Python>>正文


Python NervanaGPU.empty方法代码示例

本文整理汇总了Python中nervanagpu.NervanaGPU.empty方法的典型用法代码示例。如果您正苦于以下问题:Python NervanaGPU.empty方法的具体用法?Python NervanaGPU.empty怎么用?Python NervanaGPU.empty使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nervanagpu.NervanaGPU的用法示例。


在下文中一共展示了NervanaGPU.empty方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: GPU

# 需要导入模块: from nervanagpu import NervanaGPU [as 别名]
# 或者: from nervanagpu.NervanaGPU import empty [as 别名]
class GPU(Backend):
    """
    Sets up a NervanaGPU based backend for matrix operations.
    Note that some functions defined in the generic Backend class such as are
    cross-map pooling and normalization and adaDelta are not implemented for
    this backend.
    """
    default_dtype = np.float32

    def __init__(self, rng_seed, stochastic_round=False, device_id=0):
        self.ng = NervanaGPU(stochastic_round=stochastic_round)
        logger.info("Initialized NervanaGPU with stochastic_round=%s",
                    stochastic_round)
        self.rng_seed = rng_seed
        self.rng_init()
        self.device_id = device_id if device_id is not None else 0

    def __getstate__(self):
        """
        Defines what and how we go about serializing an instance of this class.

        Returns:
            self.__dict__: The full contents of the backend class instance,
                           except for the mem_pool which is on device and
                           cannot be serialized.
        """
        if hasattr(self, 'mem_pool') and self.mem_pool is not None:
            self.mem_pool_pickle = {'shape': self.mem_pool.shape,
                                    'dtype': np.float32}
            self.mem_pool = None

        return self.__dict__

    def __setstate__(self, state):
        """
        Defines how we go about deserializing into an instance of this class.

        Arguments:
            self.__dict__: The full contents of the backend class instance,
                           except for the mem_pool which is on device and
                           cannot be serialized.
        """
        self.__dict__.update(state)
        self.mem_pool = self.ng.empty(self.mem_pool_pickle['shape'],
                                      dtype=self.mem_pool_pickle['dtype'])

    def init_mempool(self, shape, dtype=default_dtype):
        """
        Allocates a memory pool for temporary storage
        """
        self.mem_pool = self.ng.empty(shape, dtype=dtype)

    def alloc_host_mem(self, shape, dtype):
        return drv.pagelocked_empty(shape, dtype, order="C", mem_flags=0)

    def create_stream(self):
        return drv.Stream()

    def async_copy(self, dest, src, stream=None):
        drv.memcpy_htod_async(dest.gpudata, src, stream)

    def rng_init(self):
        """
        Initialize and seed the pseudo random number genrator. Random numbers
        are generated on the host using numpy, then transfered to device.
        """
        seed = None
        if 'rng_seed' in self.__dict__:
            seed = self.rng_seed
            logger.info("Seeding random number generator with: %s", str(seed))
        np.random.seed(seed)

    def flop_timing_init(self, decorate_fc, decorate_conv, decorate_ew):
        """
        Initialize FLOP timing.  Wraps the specified MOP calls via a decorator
        to record elapsed time and number of operations.

        Arguments:
           decorate_fc (list): string giving the function names of fully
                               connected layer forward/backward/update calls
                               to time.
           decorate_conv (list): string giving the function names of
                                 convolutional layer forward/backward/update
                                 calls to time.
           decorate_ew (list): string giving the function names of element-wise
                               calls to time.

        Notes:
            Must be called prior to first flop_timing_start call
        """
        self.start = drv.Event()
        self.end = drv.Event()
        self.flop_timer = FlopsDecorator(self)
        self.flop_timer.decorate(decorate_fc=decorate_fc,
                                 decorate_conv=decorate_conv,
                                 decorate_ew=decorate_ew)

    def flop_timinig_start(self):
        """
        Start a new FLOP timer.
#.........这里部分代码省略.........
开发者ID:YouVentures,项目名称:neon,代码行数:103,代码来源:gpu.py

示例2: in

# 需要导入模块: from nervanagpu import NervanaGPU [as 别名]
# 或者: from nervanagpu.NervanaGPU import empty [as 别名]
    for K, C, N in ((3072,3072*1,32),(3072,3072*1,64),(3072,3072*1,96),(3072,3072*1,128),
                    (3072,3072*2,32),(3072,3072*2,64),(3072,3072*2,96),(3072,3072*2,128),
                    (3072,3072*3,32),(3072,3072*3,64),(3072,3072*3,96),(3072,3072*3,128),
                    (3072,3072*4,32),(3072,3072*4,64),(3072,3072*4,96),(3072,3072*4,128),): 
                    #(3072,3072,32+128*0),(3072,3072,64+128*0),(3072,3072,96+128*0),(3072,3072,128+128*0),
                    #(3072,3072,32+128*1),(3072,3072,64+128*1),(3072,3072,96+128*1),(3072,3072,128+128*1),
                    #(3072,3072,32+128*2),(3072,3072,64+128*2),(3072,3072,96+128*2),(3072,3072,128+128*2),
                    #(3072,3072,32+128*3),(3072,3072,64+128*3),(3072,3072,96+128*3),(3072,3072,128+128*3),): 
        for op,  dimA,  dimB,  dimC in (
          ("nn", (K,C), (C,N), (K,N) ),  # fprop
          ("tn", (K,C), (K,N), (C,N) ),  # bprop
          ("nt", (K,N), (C,N), (K,C) )): # update

            repeat = 5000 if C <= 3072 else 500

            devA1 = ng.empty(dimA, dtype=dtype)
            devB1 = ng.empty(dimB, dtype=dtype)
            devC1 = ng.empty(dimC, dtype=dtype)

            # fill with uniform randoms from -1 to 1
            devA1[:] = 2 * (.5 - ng.rand())
            devB1[:] = 2 * (.5 - ng.rand())

            # just alias if same dtype
            if dtype is np.float32:
                devA2 = devA1
                devB2 = devB1
            # otherwise copy
            else:
                devA2 = ng.empty(dimA, dtype=np.float32)
                devB2 = ng.empty(dimB, dtype=np.float32)
开发者ID:KayneWest,项目名称:nervanagpu,代码行数:33,代码来源:cublas2.py

示例3: slicable

# 需要导入模块: from nervanagpu import NervanaGPU [as 别名]
# 或者: from nervanagpu.NervanaGPU import empty [as 别名]
cpuI = np.random.uniform(0.0, 9.4, slicable(dimI,1)).astype(np.float16).astype(np.float32)

# zero pad the last row of cpu input for the sake of numpy
if pool.op == "max":
    cpuI[-1,:] = np.finfo(cpuI.dtype).min
else:
    cpuI[-1,:] = 0

# cpu output arrays
cpuO = np.empty(dimO, dtype=np.float32)
cpuB = np.zeros(slicable(dimI,1), dtype=np.float32)

# give gpu the input array without zero padding (not needed)
devI = ng.array(cpuI[:-1,:].reshape(dimI), dtype=dtype)
devO = ng.zeros(dimO, dtype=dtype)
devB = ng.empty(dimI, dtype=dtype)

ng.fprop_pool(pool, devI, devO, repeat=repeat)

ng.bprop_pool(pool, devI, devO, devB, repeat=repeat)

def pixel_indices(kj, mt, pr, qs):

    C       = pool.C
    J,T,R,S = pool.JTRS
    D,H,W = pool.DHW
    HW    = H*W
    DHW   = D*H*W
    imax  = C*D*H*W
    idx   = []
开发者ID:KayneWest,项目名称:nervanagpu,代码行数:32,代码来源:pool_test.py

示例4: bprop

# 需要导入模块: from nervanagpu import NervanaGPU [as 别名]
# 或者: from nervanagpu.NervanaGPU import empty [as 别名]
# bprop(nn): NK   x KC   = NC
# updat(tn): NK^T x NC   = KC

repeat = 2000


for K, C, N in ((3072,3072,32),):

    total  = 0

    for op,  dimA,  dimB,  dimC in (
      ("nn", (K,C), (C,N), (K,N) ),   # fprop
      ("tn", (K,C), (K,N), (C,N) ),   # bprop
      ("nt", (K,N), (C,N), (K,C) ),): # update

        devA = ng.empty(dimA, dtype=np.float32)
        devB = ng.empty(dimB, dtype=np.float32)
        devC = ng.empty(dimC, dtype=np.float32)

        # fill with uniform randoms from -1 to 1
        devA[:] = 2 * (.5 - ng.rand())
        devB[:] = 2 * (.5 - ng.rand())

        total += cublas_dot(op, devA, devB, devC, repeat=repeat, warmup=True)

    print "N2 Total: ", total
    total = 0

    for op,  dimA,  dimB,  dimC in (
      ("nt", (N,C), (K,C), (N,K) ),   # fprop
      ("nn", (N,K), (K,C), (N,C) ),   # bprop
开发者ID:KayneWest,项目名称:nervanagpu,代码行数:33,代码来源:minibatch_layout_diff.py

示例5: else

# 需要导入模块: from nervanagpu import NervanaGPU [as 别名]
# 或者: from nervanagpu.NervanaGPU import empty [as 别名]
            dimA = (m,k) if op[0] == 'n' else (k,m)
            dimB = (k,n) if op[1] == 'n' else (n,k)
            dimC = (m,n)

            if data_type == "All Ones":
                cpuA = np.ones(dimA, dtype=dtype).astype(np.float32)
                cpuB = np.ones(dimB, dtype=dtype).astype(np.float32)
                #cpuB = np.identity(n, dtype=np.float32)
            else:
                cpuA = np.random.uniform(-1.0, 1.0, dimA).astype(np.float32)
                cpuB = np.random.uniform(-1.0, 1.0, dimB).astype(np.float32)

            devA = ng.array(cpuA, dtype=dtype)
            devB = ng.array(cpuB, dtype=dtype)
            devC = ng.empty(dimC, dtype=dtype)

            if op[0] == 't': cpuA, devA = cpuA.T, devA.T
            if op[1] == 't': cpuB, devB = cpuB.T, devB.T

            ng.dot(devA, devB, devC, repeat=repeat)

            if cpu:

                cpuC = np.dot(cpuA, cpuB)

                cpuD = devC.get()
                diff = np.absolute(cpuC - cpuD)

                print diff.max()
                print cpuD[::max(m//4,1),::max(n//4,1)]
开发者ID:KayneWest,项目名称:nervanagpu,代码行数:32,代码来源:gemm_test2.py

示例6: type

# 需要导入模块: from nervanagpu import NervanaGPU [as 别名]
# 或者: from nervanagpu.NervanaGPU import empty [as 别名]
                inception = True

            # find the size of the largest buffers so they can be shared
            if layer.sizeF > max_weights:
                max_weights = layer.sizeF
                max_weight_layer = layer

            if layer.sizeI > max_deltas and type(prev_layer) is not DataLayer:
                max_deltas = layer.sizeI
                max_delta_layer = layer

            prev_layer = layer
            layers.append(layer)

        # Init shared buffers (assumes consistent dtype for now)
        shared_deltas.append(ng.empty(max_delta_layer.dimI, dtype=max_delta_layer.dtype))
        shared_deltas.append(ng.empty(max_delta_layer.dimI, dtype=max_delta_layer.dtype))
        if inception:
            shared_deltas.append(ng.empty(max_delta_layer.dimI, dtype=max_delta_layer.dtype))
            shared_deltas.append(ng.empty(max_delta_layer.dimI, dtype=max_delta_layer.dtype))

        shared_updates = ng.empty(max_weight_layer.dimF, dtype=np.float32)

        for i, layer in enumerate(layers):
            print(layer)

            # Intitalize buffers.  Alernate shared delta buffer.
            # One layer can't have the same buffer for both error in and error out.
            layer.init_activations()
            layer.init_weights(shared=shared_updates, zeros=zeros)
            if i > 1:
开发者ID:3upperm2n,项目名称:convnet-benchmarks,代码行数:33,代码来源:convnet-benchmarks.py

示例7:

# 需要导入模块: from nervanagpu import NervanaGPU [as 别名]
# 或者: from nervanagpu.NervanaGPU import empty [as 别名]
    N,C,K = conv.NCK
    D,H,W = conv.DHW
    T,R,S = conv.TRS
    M,P,Q = conv.MPQ
    pad_d, pad_h, pad_w = conv.padding
    str_d, str_h, str_w = conv.strides
    alpha, beta = (1.0, 0.0)

    dimI = conv.dimI2
    dimF = conv.dimF2
    dimO = conv.dimO2

    print "cudnn:"

    cuI = ng.empty(dimI[::-1], dtype=np.float32)
    cuF = ng.empty(dimF[::-1], dtype=np.float32)
    cuE = ng.empty(dimO[::-1], dtype=np.float32)
    cuB = ng.empty(dimI[::-1], dtype=np.float32)
    cuU = ng.empty(dimF[::-1], dtype=np.float32)
    cuO = ng.empty(dimO[::-1], dtype=np.float32)
    cuI[:] = 2 * (.5 - ng.rand())
    cuF[:] = 2 * (.5 - ng.rand())
    cuE[:] = 2 * (.5 - ng.rand())

    #print drv.mem_get_info()

    I_data = ctypes.c_void_p(int(cuI.gpudata))
    F_data = ctypes.c_void_p(int(cuF.gpudata))
    O_data = ctypes.c_void_p(int(cuO.gpudata))
    E_data = ctypes.c_void_p(int(cuE.gpudata))
开发者ID:KayneWest,项目名称:nervanagpu,代码行数:32,代码来源:cudnn.py

示例8: sorted

# 需要导入模块: from nervanagpu import NervanaGPU [as 别名]
# 或者: from nervanagpu.NervanaGPU import empty [as 别名]
            layers.append(layer)

            # find the size of the largest buffers so they can be shared
            if layer.sizeF > max_weights:
                max_weights = layer.sizeF
                max_weight_layer = layer

            if layer.sizeO > max_deltas:
                max_deltas = layer.sizeO
                max_delta_layer = layer

        # for layer in sorted(layers, key=lambda l: l.sizeO, reverse=True):
        #     print("%d %s" % (layer.sizeO, layer))

        # Init shared buffers (assumes consistent dtype for now)
        shared_deltas[0] = ng.empty(max_delta_layer.dimO2,  dtype=max_delta_layer.dtype)
        shared_deltas[1] = ng.empty(max_delta_layer.dimO2,  dtype=max_delta_layer.dtype)
        shared_weights   = ng.empty(max_weight_layer.dimF2, dtype=max_weight_layer.dtype)

        prev_layer = None
        delta = False
        for layer in layers:

            print(layer)

            # Intitalize buffers.  Alernate shared delta buffer.
            # One layer can't have the same buffer for both error in and error out.
            layer.init_activations()
            layer.init_weights(shared=shared_weights)
            layer.init_deltas(shared=shared_deltas[delta])
开发者ID:ekelsen,项目名称:nervanagpu,代码行数:32,代码来源:convnet-benchmarks.py

示例9:

# 需要导入模块: from nervanagpu import NervanaGPU [as 别名]
# 或者: from nervanagpu.NervanaGPU import empty [as 别名]
    dimO = (X,N,K)

if ones:
    cpuI = np.ones(dimI, dtype=np.float32)
    cpuE = np.ones(dimO, dtype=np.float32)
    cpuW = np.ones(dimW, dtype=np.float32)
else:
    cpuI = np.random.uniform(-1.0, 1.0, dimI).astype(dtype).astype(np.float32)
    cpuE = np.random.uniform(-1.0, 1.0, dimO).astype(dtype).astype(np.float32)
    cpuW = np.random.uniform(-1.0, 1.0, dimW).astype(dtype).astype(np.float32)

devI = ng.array(cpuI, dtype=dtype)
devE = ng.array(cpuE, dtype=dtype)
devW = ng.array(cpuW, dtype=dtype)

devO = ng.empty(dimO, dtype=dtype)
devB = ng.empty(dimI, dtype=dtype)
devU = ng.empty(dimW, dtype=dtype)

if Nin:
    ng.batched_dot(devW,   devI,   devO, repeat=repeat, size=size) # fprop
    ng.batched_dot(devW.T, devE,   devB, repeat=repeat, size=size) # bprop
    ng.batched_dot(devE,   devI.T, devU, repeat=repeat, size=size) # update
else:
    ng.batched_dot(devI,   devW.T, devO, repeat=repeat, size=size) # fprop
    ng.batched_dot(devE,   devW,   devB, repeat=repeat, size=size) # bprop
    ng.batched_dot(devE.T, devI,   devU, repeat=repeat, size=size) # update

if cpu:

    cpuO = np.empty(dimO, dtype=np.float32)
开发者ID:chagge,项目名称:nervanagpu,代码行数:33,代码来源:batched_dot_test.py

示例10: padding

# 需要导入模块: from nervanagpu import NervanaGPU [as 别名]
# 或者: from nervanagpu.NervanaGPU import empty [as 别名]
cpuI[-1,:] = 0.0

# cpu output arrays
cpuO = np.zeros(dimO,             dtype=np.float32)
cpuB = np.zeros(slicable(dimI,1), dtype=np.float32)
cpuU = np.zeros(slicable(dimF),   dtype=np.float32)

# give gpu the input array without zero padding (not needed)
devI = ng.array(cpuI[:-1,:].reshape(dimI), dtype=dtype)
devF = ng.array(cpuF.reshape(dimF), dtype=dtype)
devE = ng.array(cpuE, dtype=dtype)

devO = devB = devU = 0

if "fprop"  in ops:
    devO = ng.empty(dimO, dtype=dtype)
    ng.fprop_conv(conv,  devI, devF, devO, alpha=1.0, repeat=repeat)

if "bprop"  in ops:
    devB = ng.empty(dimI, dtype=dtype)
    ng.bprop_conv(conv,  devF, devE, devB, alpha=1.0, repeat=repeat)

if "update" in ops:
    devU = ng.empty(dimF, dtype=dtype)
    ng.update_conv(conv, devI, devE, devU, alpha=1.0, repeat=repeat)


def pixel_indices(mt, pr, qs):

    T,R,S = conv.TRS
    D,H,W = conv.DHW
开发者ID:KayneWest,项目名称:nervanagpu,代码行数:33,代码来源:conv_test.py


注:本文中的nervanagpu.NervanaGPU.empty方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。