当前位置: 首页>>代码示例>>Python>>正文


Python NervanaGPU.dot方法代码示例

本文整理汇总了Python中nervanagpu.NervanaGPU.dot方法的典型用法代码示例。如果您正苦于以下问题:Python NervanaGPU.dot方法的具体用法?Python NervanaGPU.dot怎么用?Python NervanaGPU.dot使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nervanagpu.NervanaGPU的用法示例。


在下文中一共展示了NervanaGPU.dot方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: max

# 需要导入模块: from nervanagpu import NervanaGPU [as 别名]
# 或者: from nervanagpu.NervanaGPU import dot [as 别名]
                devA2 = ng.empty(dimA, dtype=np.float32)
                devB2 = ng.empty(dimB, dtype=np.float32)
                devA2[:] = devA1
                devB2[:] = devB1

            devC2 = ng.empty(dimC, dtype=np.float32)

            if op[0] == 't': devA1, devA2 = devA1.T, devA2.T
            if op[1] == 't': devB1, devB2 = devB1.T, devB2.T

            glops16 = 0
            glops32 = 0
            glops64 = 0
            if op == "tn" and dtype is np.float16:
                # Experimental 128x16 gemm kernel
                glops16 = ng.dot(devA1, devB1, devC1, repeat=repeat, size=16)
            if op != 'nt':
                glops32 = ng.dot(devA1, devB1, devC1, repeat=repeat, size=32)
                glops64 = ng.dot(devA1, devB1, devC1, repeat=repeat, size=64)
            glops128 = ng.dot(devA1, devB1, devC1, repeat=repeat, size=128)

            glops = max(glops16, glops32, glops64, glops128)

            if glops16 == glops:
                fastest = 16
            elif glops32 == glops:
                fastest = 32
            elif glops64 == glops:
                fastest = 64
            else:
                fastest = 128
开发者ID:KayneWest,项目名称:nervanagpu,代码行数:33,代码来源:cublas2.py

示例2: GPU

# 需要导入模块: from nervanagpu import NervanaGPU [as 别名]
# 或者: from nervanagpu.NervanaGPU import dot [as 别名]

#.........这里部分代码省略.........

    def uniform(self, low=0.0, high=1.0, shape=1, dtype=default_dtype,
                persist_values=True, name=None, allocator=drv.mem_alloc):
        """
        generate numpy random number and convert to a GPUTensor.
        If called with dype=None it will probably explode
        """
        ary = np.random.uniform(low, high, shape)
        return GPUTensor(ary.shape, dtype, allocator=allocator, name=name,
                         rounding=self.ng.round_mode).set(ary)

    def normal(self, loc=0.0, scale=1.0, size=1, dtype=default_dtype,
               persist_values=True, name=None, allocator=drv.mem_alloc):
        """
        Gaussian/Normal random number sample generation
        """
        ary = np.random.normal(loc, scale, size)
        return GPUTensor(ary.shape, dtype, allocator=allocator, name=name,
                         rounding=self.ng.round_mode).set(ary)

    def fprop_fc(self, out, inputs, weights, layer=None):
        """
        Forward propagate the inputs of a fully connected network layer to
        produce output pre-activations (ready for transformation by an
        activation function).

        Arguments:
            out (GPUTensor): Where to store the forward propagated results.
            inputs (GPUTensor): Will be either the dataset input values (first
                                layer), or the outputs from the previous layer.
            weights (GPUTensor): The weight coefficient values for this layer.
            layer (Layer): The layer object.
        """
        self.ng.dot(weights, inputs, out)

    def bprop_fc(self, out, weights, deltas, layer=None):
        """
        Backward propagate the error through a fully connected network layer.

        Arguments:
            out (GPUTensor): Where to store the backward propagated errors.
            weights (GPUTensor): The weight coefficient values for this layer.
            deltas (GPUTensor): The error values for this layer
            layer (Layer): The layer object.
        """
        self.ng.dot(weights.T, deltas, out)

    def update_fc(self, out, inputs, deltas, layer=None):
        """
        Compute the updated gradient for a fully connected network layer.

        Arguments:
            out (GPUTensor): Where to store the updated gradient value.
            inputs (GPUTensor): Will be either the dataset input values (first
                                layer), or the outputs from the previous layer.
            deltas (GPUTensor): The error values for this layer
            layer (Layer): The layer object.
        """
        self.ng.dot(deltas, inputs.T, out)

    def fprop_conv(self, out, inputs, weights, ofmshape, ofmsize, ofmlocs,
                   ifmshape, links, nifm, padding, stride, ngroups, fpropbuf,
                   local=False):
        """
        Forward propagate the inputs of a convolutional network layer to
        produce output pre-activations (ready for transformation by an
开发者ID:YouVentures,项目名称:neon,代码行数:70,代码来源:gpu.py

示例3: in

# 需要导入模块: from nervanagpu import NervanaGPU [as 别名]
# 或者: from nervanagpu.NervanaGPU import dot [as 别名]
                                    devA2[:] = devA1
                                    devB2[:] = devB1

                                devC2    = ng.empty(dimC, dtype=np.float32)
                                # devC2    = devC2s.share(dimC, dtype=np.float32)
                                devC2[:] = devC1

                                if op[0] == 't': devA1, devA2 = devA1.T, devA2.T
                                if op[1] == 't': devB1, devB2 = devB1.T, devB2.T

                                for tile in (32,64,128):
                                    if op == 'nt' and tile != 128:
                                        continue
                                    try: 

                                        ng.dot(devA1, devB1, devC1, alpha=alpha, beta=beta, size=tile)
                                        context.synchronize()

                                        cublas_dot(devA2, devB2, devC2, alpha=alpha, beta=beta)

                                        partial1 = ng.empty((devC1.shape[0],1), dtype=np.float32)
                                        partial2 = partial1[0:1,0:1]

                                        if ng.min(ng.finite(devC1), partial=partial1, out=partial2).get()[0,0] == 0.0:
                                            print("Error: NaN KCN: (%d,%d,%d) ab: (%f,%f) dtype: %d" %
                                                  (K,C,N, alpha,beta, itemsize))
                                            exit()

                                        diff = ng.max(abs(devC2 - devC1), partial=partial1, out=partial2).get()[0,0]
                                        mean = ng.mean(abs(devC2), partial=partial1, out=partial2).get()[0,0]
                                        pctErr = 100 * diff / mean
开发者ID:leonardt,项目名称:nervanagpu,代码行数:33,代码来源:cublas_test.py

示例4: exit

# 需要导入模块: from nervanagpu import NervanaGPU [as 别名]
# 或者: from nervanagpu.NervanaGPU import dot [as 别名]
            if data_type == "All Ones":
                cpuA = np.ones(dimA, dtype=dtype).astype(np.float32)
                cpuB = np.ones(dimB, dtype=dtype).astype(np.float32)
                #cpuB = np.identity(n, dtype=np.float32)
            else:
                cpuA = np.random.uniform(-1.0, 1.0, dimA).astype(np.float32)
                cpuB = np.random.uniform(-1.0, 1.0, dimB).astype(np.float32)

            devA = ng.array(cpuA, dtype=dtype)
            devB = ng.array(cpuB, dtype=dtype)
            devC = ng.empty(dimC, dtype=dtype)

            if op[0] == 't': cpuA, devA = cpuA.T, devA.T
            if op[1] == 't': cpuB, devB = cpuB.T, devB.T

            ng.dot(devA, devB, devC, repeat=repeat)

            if cpu:

                cpuC = np.dot(cpuA, cpuB)

                cpuD = devC.get()
                diff = np.absolute(cpuC - cpuD)

                print diff.max()
                print cpuD[::max(m//4,1),::max(n//4,1)]
                print cpuC[::max(m//4,1),::max(n//4,1)]
                print diff[::max(m//4,1),::max(n//4,1)]

                # print cpuD
                # exit()
开发者ID:KayneWest,项目名称:nervanagpu,代码行数:33,代码来源:gemm_test2.py

示例5: cublas_dot

# 需要导入模块: from nervanagpu import NervanaGPU [as 别名]
# 或者: from nervanagpu.NervanaGPU import dot [as 别名]
                    devA2 = devA1
                    devB2 = devB1
                # otherwise copy
                else:
                    devA2 = ng.empty(dimA, dtype=np.float32)
                    devB2 = ng.empty(dimB, dtype=np.float32)
                    devA2[:] = devA1
                    devB2[:] = devB1

                devC2    = ng.empty(dimC, dtype=np.float32)
                devC2[:] = devC1

                if op[0] == 't': devA1, devA2 = devA1.T, devA2.T
                if op[1] == 't': devB1, devB2 = devB1.T, devB2.T

                ng.dot(devA1, devB1, devC1, alpha=alpha, beta=beta, repeat=repeat)

                cublas_dot(devA2, devB2, devC2, alpha=alpha, beta=beta, repeat=repeat)

                partial1 = ng.empty((devC1.shape[0],1), dtype=np.float32)
                partial2 = partial1[0:1,0:1]

                diff = ng.max(abs(devC2 - devC1), partial=partial1, out=partial2).get()[0,0]
                mean = ng.mean(abs(devC2), partial=partial1, out=partial2).get()[0,0]

                #if diff > .1:
                print("Error: %.3f%%" % (100 * diff / mean))

                print("--------------------------------------------------------------------------------")

cublas.cublasDestroy(handle)
开发者ID:leonardt,项目名称:nervanagpu,代码行数:33,代码来源:cublas.py

示例6: min

# 需要导入模块: from nervanagpu import NervanaGPU [as 别名]
# 或者: from nervanagpu.NervanaGPU import dot [as 别名]
                    cpuA = np.random.uniform(-1.0, 1.0, dimA).astype(np.float32)
                    cpuB = np.random.uniform(-1.0, 1.0, dimB).astype(np.float32)
                    # cpuB = np.identity(n, dtype=dtype)

                    devA = ng.array(cpuA, dtype=dtype)
                    devB = ng.array(cpuB, dtype=dtype)
                    devC = ng.empty(dimC, dtype=dtype)

                    # repeat = min(int(50.0 * 4096**3 / (m * n * k)), 1000)

                    if op[0] == "t":
                        cpuA, devA = cpuA.T, devA.T
                    if op[1] == "t":
                        cpuB, devB = cpuB.T, devB.T

                    ng.dot(devA, devB, devC, repeat=1)

                    # context.synchronize()

                    cpuC = np.dot(cpuA, cpuB)

                    cpuD = devC.get()
                    diff = np.absolute(cpuC - cpuD)
                    max_diff = diff.max()
                    print(max_diff, cpuD.max())
                    if max_diff > 0.1 or max_diff != max_diff:
                        # print(m, n, k, max_diff)
                        print(cpuD[:: max(m // 16, 1), :: max(n // 16, 1)])
                        print(cpuC[:: max(m // 16, 1), :: max(n // 16, 1)])
                        print(diff[:: max(m // 16, 1), :: max(n // 16, 1)])
                        exit()
开发者ID:chagge,项目名称:nervanagpu,代码行数:33,代码来源:gemm_test.py

示例7: MGPU

# 需要导入模块: from nervanagpu import NervanaGPU [as 别名]
# 或者: from nervanagpu.NervanaGPU import dot [as 别名]

#.........这里部分代码省略.........
        assert hbuf.size == dbuf.size * dbuf.num_dev
        assert isinstance(dbuf, MGPUTensor)
        assert hbuf.dtype == dbuf.dtype
        ndata = dbuf.size
        starts = [i * ndata for i in range(self.num_dev)]

        for dest, strm, ctx, doff in zip(dbuf.tlist, self.strms, self.ctxs,
                                         starts):
            src = hbuf.reshape((hbuf.size))[doff:(doff + ndata)]
            ctx.push()
            drv.memcpy_htod_async(dest.ptr, src, strm)
            ctx.pop()

        self.synchronize()

    def fprop_fc(self, out, inputs, weights, layer=None):
        """
        In this case, the weights are shards, the acts are replicas
        ubuf should be of size nout/num_dev x mbsz
        """
        ubuf = layer.mempool[0]
        assert ubuf.shape == (weights.shape[0], inputs.shape[1])

        if layer.use_biases:
            biases = layer.biases.tlist
        else:
            biases = [None for i in range(self.num_dev)]

        for dbuf, ibuf, wt, bs, strm, ctx in zip(ubuf.tlist, inputs.tlist,
                                                 weights.tlist, biases,
                                                 self.strms, self.ctxs):
            ctx.push()
            self.ng.stream = strm
            self.ng.dot(wt, ibuf, dbuf)
            if layer.use_biases:
                self.ng.add(dbuf, bs, out=dbuf)
            ctx.pop()

        # Note, should be safe not to sync because each fragment is computed
        # on the same stream that originates the copy
        # self.synchronize()
        self.fragment_to_replica(ubuf, out)

    def bprop_fc(self, out, weights, deltas, layer=None):
        """
        Backward propagate the error through a fully connected network layer.

        Arguments:
            out (GPUTensor): Where to store the backward propagated errors.
            weights (GPUTensor): The weight coefficient values for this layer.
            deltas (GPUTensor): The error values for this layer
            layer (Layer): The layer object.
        """
        ubuf = layer.mempool[1]
        wtsz = weights.shape[0]
        starts = [i * wtsz for i in range(self.num_dev)]
        assert out.shape == (weights.shape[1], deltas.shape[1])
        assert ubuf.shape == out.shape

        for dbuf, ibuf, wt, strm, ctx, off in zip(out.tlist, deltas.tlist,
                                                  weights.tlist, self.strms,
                                                  self.ctxs, starts):
            ctx.push()
            self.ng.stream = strm
            self.ng.dot(wt.T, ibuf[off:(off + wtsz)], dbuf)
            ctx.pop()
开发者ID:neuroidss,项目名称:neon,代码行数:70,代码来源:mgpu.py


注:本文中的nervanagpu.NervanaGPU.dot方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。