本文整理汇总了Python中veles.memory.Array.map_invalidate方法的典型用法代码示例。如果您正苦于以下问题:Python Array.map_invalidate方法的具体用法?Python Array.map_invalidate怎么用?Python Array.map_invalidate使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类veles.memory.Array
的用法示例。
在下文中一共展示了Array.map_invalidate方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Summator
# 需要导入模块: from veles.memory import Array [as 别名]
# 或者: from veles.memory.Array import map_invalidate [as 别名]
class Summator(AcceleratedUnit):
"""Multiplies two vectors pointwise.
"""
def __init__(self, workflow, **kwargs):
super(Summator, self).__init__(workflow, **kwargs)
self.output = Array()
self.demand("x", "y")
def initialize(self, device, **kwargs):
super(Summator, self).initialize(device, **kwargs)
if not self.output:
self.output.reset(numpy.zeros_like(self.x.mem))
else:
assert self.output.shape == self.x.shape
self.init_vectors(self.x, self.y, self.output)
def init_unpickled(self):
super(Summator, self).init_unpickled()
self.sources_["summator"] = {}
def _gpu_init(self):
self.build_program({"OUTPUT_SIZE": self.output.size},
"%s_%d" %
(self.__class__.__name__, self.output.size),
dtype=self.x.dtype)
self.assign_kernel("add_forward")
self.set_args(self.x, self.y, self.output)
def cuda_init(self):
self._gpu_init()
block_size = self.device.suggest_block_size(self._kernel_)
self._global_size = (
int(numpy.ceil(self.output.size / block_size)), 1, 1)
self._local_size = (block_size, 1, 1)
def ocl_init(self):
self._gpu_init()
self._global_size = (self.output.size, 1, 1)
self._local_size = None
def numpy_init(self):
pass # nothing to init
def _gpu_run(self):
self.unmap_vectors(self.x, self.y, self.output)
self.execute_kernel(self._global_size, self._local_size)
def cuda_run(self):
self._gpu_run()
def ocl_run(self):
self._gpu_run()
def numpy_run(self):
self.x.map_read()
self.y.map_read()
self.output.map_invalidate()
numpy.add(self.x.mem, self.y.mem, self.output.mem)
示例2: GDSummator
# 需要导入模块: from veles.memory import Array [as 别名]
# 或者: from veles.memory.Array import map_invalidate [as 别名]
class GDSummator(AcceleratedUnit):
"""Gradient descent for Summator.
"""
def __init__(self, workflow, **kwargs):
super(GDSummator, self).__init__(workflow, **kwargs)
self.err_x = Array()
self.err_y = Array()
self.demand("err_output")
def initialize(self, device, **kwargs):
super(GDSummator, self).initialize(device, **kwargs)
if self.err_x:
assert self.err_x.shape[1:] == self.err_output.shape[1:]
if not self.err_x or self.err_x.shape[0] != self.err_output.shape[0]:
self.err_x.reset(numpy.zeros_like(self.err_output.mem))
if self.err_y:
assert self.err_y.shape[1:] == self.err_output.shape[1:]
if not self.err_y or self.err_y.shape[0] != self.err_output.shape[0]:
self.err_y.reset(numpy.zeros_like(self.err_output.mem))
self.init_vectors(self.err_x, self.err_y, self.err_output)
def cuda_init(self):
pass # nothing to init
def ocl_init(self):
pass # nothing to init
def numpy_init(self):
pass # nothing to init
def cuda_run(self):
self.unmap_vectors(self.err_output, self.err_x, self.err_y)
self.err_x.devmem.from_device_async(self.err_output.devmem)
self.err_y.devmem.from_device_async(self.err_output.devmem)
def ocl_run(self):
self.unmap_vectors(self.err_output, self.err_x, self.err_y)
self.device.queue_.copy_buffer(
self.err_output.devmem, self.err_x.devmem, 0, 0,
self.err_output.nbytes, need_event=False)
self.device.queue_.copy_buffer(
self.err_output.devmem, self.err_y.devmem, 0, 0,
self.err_output.nbytes, need_event=False)
def numpy_run(self):
self.err_output.map_read()
self.err_x.map_invalidate()
self.err_y.map_invalidate()
self.err_x.mem[:] = self.err_output.mem[:]
self.err_y.mem[:] = self.err_output.mem[:]
示例3: MemCpy
# 需要导入模块: from veles.memory import Array [as 别名]
# 或者: from veles.memory.Array import map_invalidate [as 别名]
class MemCpy(AcceleratedUnit):
def __init__(self, workflow, **kwargs):
super(MemCpy, self).__init__(workflow, **kwargs)
self.output = Array()
self.demand("input")
def initialize(self, device, **kwargs):
super(MemCpy, self).initialize(device, **kwargs)
if (self.output.mem is None or
self.output.mem.size != self.input.mem.size):
self.output.reset()
self.output.mem = numpy.zeros(self.input.mem.shape,
dtype=self.input.mem.dtype)
self.input.initialize(self.device)
self.output.initialize(self.device)
def cuda_init(self):
pass
def ocl_init(self):
pass
def _gpu_run(self):
self.input.unmap()
self.output.unmap()
def ocl_run(self):
self._gpu_run()
self.device.queue_.copy_buffer(self.input.devmem, self.output.devmem,
0, 0, self.input.nbytes)
def cuda_run(self):
self._gpu_run()
self.output.devmem.from_device_async(self.input.devmem)
def numpy_run(self):
self.input.map_read()
self.output.map_invalidate()
numpy.copyto(self.output.mem, self.input.mem)
示例4: All2AllSoftmax
# 需要导入模块: from veles.memory import Array [as 别名]
# 或者: from veles.memory.Array import map_invalidate [as 别名]
class All2AllSoftmax(All2All):
"""All2All with linear activation and softmax normalization.
Must be assigned before initialize():
Updates after run():
max_idx
Creates within initialize():
max_idx
Attributes:
krn_sm_: kernel for softmax activation calculation.
max_idx: indexes of element with maximum value for each sample.
"""
__id__ = "420219fc-3e1a-45b1-87f8-aaa0c1540de4"
MAPPING = {"softmax"}
def __init__(self, workflow, **kwargs):
super(All2AllSoftmax, self).__init__(workflow, **kwargs)
self.max_idx = Array()
self.reduce_size = 256
def init_unpickled(self):
super(All2AllSoftmax, self).init_unpickled()
self.krn_sm_ = None
self._force_gpu_apply_exp = False
def initialize(self, device, **kwargs):
self.reduce_size = min(self.reduce_size,
int(numpy.prod(self.output_sample_shape)))
self.sources_["all2all/softmax"] = {
"REDUCE_SIZE": self.reduce_size
}
retval = super(All2AllSoftmax, self).initialize(
device=device, **kwargs)
if retval:
return retval
if self.output.mem.size // self.output.mem.shape[0] <= 1:
raise error.BadFormatError(
"Output sample size should be greater than 1 for SoftMax.")
if not self.max_idx:
self.max_idx.reset(numpy.zeros(self.output.shape[0],
dtype=numpy.int32))
self.max_idx.initialize(self.device)
return retval
def numpy_apply_exp(self):
self.output.map_write()
self.max_idx.map_invalidate()
out = self.output.mem
out = reshape(out, (out.shape[0], out.size // out.shape[0]))
for i, sample in enumerate(out):
im = sample.argmax()
self.max_idx[i] = im
m = sample[im]
sample -= m
numpy.exp(sample, sample)
smm = sample.sum()
sample /= smm
def ocl_apply_exp(self):
self.unmap_vectors(self.output, self.max_idx)
global_size = (self.output.shape[0] * self.reduce_size,)
local_size = (self.reduce_size,)
self.execute_kernel(global_size, local_size, self.krn_sm_)
def cuda_apply_exp(self):
self.unmap_vectors(self.output, self.max_idx)
global_size = (self.output.shape[0], 1, 1)
local_size = (self.reduce_size, 1, 1)
self.execute_kernel(global_size, local_size, self.krn_sm_)
def numpy_run(self):
"""Forward propagation from batch on CPU only.
"""
super(All2AllSoftmax, self).numpy_run()
if not self._force_gpu_apply_exp:
self.numpy_apply_exp()
def ocl_run(self):
"""Forward propagation from batch on GPU.
"""
self._force_gpu_apply_exp = True
super(All2AllSoftmax, self).ocl_run()
self.ocl_apply_exp()
def cuda_run(self):
"""Forward propagation from batch on GPU.
"""
self._force_gpu_apply_exp = True
super(All2AllSoftmax, self).cuda_run()
self.cuda_apply_exp()
def ocl_init(self):
super(All2AllSoftmax, self).ocl_init()
self.krn_sm_ = self.get_kernel("apply_exp")
self.krn_sm_.set_args(self.output.devmem, self.max_idx.devmem)
#.........这里部分代码省略.........
示例5: OffsetPooling
# 需要导入模块: from veles.memory import Array [as 别名]
# 或者: from veles.memory.Array import map_invalidate [as 别名]
class OffsetPooling(Pooling):
"""Pooling by offset forward propagation.
Must be assigned before initialize():
Updates after run():
input_offset
Creates within initialize():
input_offset
Attributes:
input_offset: offsets in the input where elements are passed through.
"""
MAPPING = set()
hide_from_registry = True
def __init__(self, workflow, **kwargs):
super(OffsetPooling, self).__init__(workflow, **kwargs)
self.input_offset = Array()
self.demand("input")
def initialize(self, device, **kwargs):
super(OffsetPooling, self).initialize(device=device, **kwargs)
if self._no_output:
return
if self.input_offset:
assert self.input_offset.shape[1:] == self.output.shape[1:]
if (not self.input_offset or
self.input_offset.shape[0] != self.output.shape[0]):
self.input_offset.reset(numpy.zeros(self.output.shape,
dtype=numpy.int32))
self.input_offset.initialize(self.device)
def set_args(self, *args):
super(OffsetPooling, self).set_args(self.input, self.output,
self.input_offset, *args)
def ocl_run(self):
self.input_offset.unmap()
super(OffsetPooling, self).ocl_run()
def cuda_run(self):
self.input_offset.unmap()
super(OffsetPooling, self).cuda_run()
def numpy_run(self):
self.input_offset.map_invalidate()
super(OffsetPooling, self).numpy_run()
def numpy_run_cut(self, cut, coords):
batch, y1, x1, ch, out_y, out_x = coords
cut_index = self.numpy_run_cut_offset(
cut, numpy.ravel_multi_index((batch, out_y, out_x, ch),
self.output.shape))
i, j = numpy.unravel_index(cut_index, cut.shape)
idx = numpy.ravel_multi_index((batch, y1 + i, x1 + j, ch),
self.input.shape)
val = numpy.ravel(self.input.mem)[idx]
self.input_offset.mem[batch, out_y, out_x, ch] = idx
return val
示例6: DropoutForward
# 需要导入模块: from veles.memory import Array [as 别名]
# 或者: from veles.memory.Array import map_invalidate [as 别名]
class DropoutForward(Forward, Dropout):
"""
Forward propagation of dropout layer.
"""
MIN_RANDOM_STATE = 0
MAX_RANDOM_STATE = 0x100000000
MAPPING = {"dropout"}
def __init__(self, workflow, **kwargs):
super(DropoutForward, self).__init__(workflow, **kwargs)
self.mask = Array() # dropout mask
self.states = Array()
self.rand = random_generator.get()
@Dropout.dropout_ratio.setter
def dropout_ratio(self, value):
Dropout.dropout_ratio.fset(self, value)
if hasattr(self, "input") and self.input is not None:
self.calc_mask()
def initialize(self, device, **kwargs):
super(DropoutForward, self).initialize(device=device, **kwargs)
self.mask.mem = numpy.empty_like(self.input.mem)
self.states.mem = self.rand.randint(
low=DropoutForward.MIN_RANDOM_STATE,
high=DropoutForward.MAX_RANDOM_STATE,
size=self.input.size * 4).astype(numpy.uint32)
if not self.output:
self.output.reset(numpy.zeros_like(self.input.mem))
else:
assert self.output.shape == self.input.shape
self.init_vectors(self.input, self.output, self.states, self.mask)
def _gpu_init(self):
self._threshold_arg_ = numpy.empty(1, dtype=numpy.uint64)
self._pass_arg_ = numpy.empty(1, dtype=self.input.dtype)
self.build_program({"OUTPUT_SIZE": self.input.size}, "%s_%s" %
(self.__class__.__name__,
"x".join(str(x) for x in self.input.shape)),
dtype=self.input.dtype)
self.assign_kernel("dropout_forward")
self.set_args(self.input, self.device.skip(2), self.states, self.mask,
self.output)
def ocl_init(self):
self._gpu_init()
self._global_size = (self.input.size,)
self._local_size = None
def cuda_init(self):
self._gpu_init()
block_size = self.device.suggest_block_size(self._kernel_)
self._global_size = (
int(numpy.ceil(self.input.size / block_size)), 1, 1)
self._local_size = (block_size, 1, 1)
def calc_mask(self):
leave_ratio = 1.0 - self.dropout_ratio
self.rand.fill(self.mask.mem, -self.dropout_ratio, leave_ratio)
numpy.maximum(self.mask.mem, 0, self.mask.mem)
numpy.ceil(self.mask.mem, self.mask.mem)
self.mask.mem[:] = (self.mask.mem.astype(self.input.dtype) /
leave_ratio)
def numpy_run(self):
self.output.map_invalidate()
self.input.map_read()
if not self.forward_mode:
self.mask.map_invalidate()
self.calc_mask()
numpy.multiply(self.input.mem.ravel(), self.mask.mem.ravel(),
ravel(self.output.mem))
else:
self.output.mem[:] = self.input.mem
def _gpu_run(self):
self.unmap_vectors(self.input, self.output)
if self.forward_mode:
# Will copy input to output from outside (in cuda_run/ocl_run).
return True
self.unmap_vectors(self.states, self.mask)
self._threshold_arg_[0] = ((1 << 64) - 1.0) * self.dropout_ratio
self._pass_arg_[0] = 1.0 / (1.0 - self.dropout_ratio)
self.set_arg(1, self._threshold_arg_)
self.set_arg(2, self._pass_arg_)
self.execute_kernel(self._global_size, self._local_size)
return False
def ocl_run(self):
if self._gpu_run():
self.device.queue_.copy_buffer(
self.input.devmem, self.output.devmem, 0, 0,
self.output.nbytes, need_event=False)
def cuda_run(self):
if self._gpu_run():
self.output.devmem.from_device_async(self.input.devmem)
示例7: MeanDispNormalizer
# 需要导入模块: from veles.memory import Array [as 别名]
# 或者: from veles.memory.Array import map_invalidate [as 别名]
class MeanDispNormalizer(AcceleratedUnit, TriviallyDistributable):
"""Normalizes multichannel byte images according to
dataset mean and dispersion.
Attributes:
input: minibatch of images (dtype=numpy.uint8,
shape[0]=minibatch_size).
mean: mean image over the dataset (dtype=numpy.uint8).
rdisp: 1.0 / dispersion over the dataset (float datatype).
output: normalized float images of the same dtype as rdisp.
"""
def __init__(self, workflow, **kwargs):
kwargs["view_group"] = kwargs.get("view_group", "WORKER")
super(MeanDispNormalizer, self).__init__(workflow, **kwargs)
self.output = Array()
self.global_size = None
self.local_size = None
self.demand("input", "mean", "rdisp")
def init_unpickled(self):
super(MeanDispNormalizer, self).init_unpickled()
self.sources_["mean_disp_normalizer"] = {}
def initialize(self, device, **kwargs):
super(MeanDispNormalizer, self).initialize(device, **kwargs)
for arr in self.input, self.mean, self.rdisp:
if not isinstance(arr, Array):
raise TypeError(
"veles.memory.Array type expected (got %s)" % type(arr))
if not arr:
raise ValueError("Invalid Array state")
if len(self.input.shape) < 2:
raise ValueError("input should be at least 2D")
sample_size = self.mean.size
if (self.input.sample_size != sample_size or
self.rdisp.size != sample_size):
raise ValueError(
"Sample size of input differs from mean-rdisp size")
if not self.output:
self.output.reset(numpy.zeros(self.input.shape, self.rdisp.dtype))
else:
assert self.output.shape == self.input.shape
self.init_vectors(self.input, self.mean, self.rdisp, self.output)
def _gpu_init(self):
dtype = self.rdisp.dtype
sample_size = self.mean.size
defines = {
"input_type": numpy_dtype_to_opencl(self.input.dtype),
"mean_type": numpy_dtype_to_opencl(self.mean.dtype),
"SAMPLE_SIZE": sample_size
}
self.build_program(defines, self.__class__.__name__, dtype=dtype)
self.assign_kernel("normalize_mean_disp")
self.set_args(self.input, self.mean, self.rdisp, self.output)
def ocl_init(self):
self._gpu_init()
self.global_size = [self.mean.size, self.input.shape[0]]
def cuda_init(self):
self._gpu_init()
self.local_size = 1, 1, 1
self.global_size = self.mean.size, self.input.shape[0], 1
def _gpu_run(self):
self.unmap_vectors(self.input, self.mean, self.rdisp, self.output)
self.execute_kernel(self.global_size, self.local_size)
def ocl_run(self):
self._gpu_run()
def cuda_run(self):
self._gpu_run()
def numpy_run(self):
self.input.map_read()
self.mean.map_read()
self.rdisp.map_read()
self.output.map_invalidate()
dtype = self.output.dtype
self.output.matrix[:] = (
self.input.matrix.astype(dtype)[:] -
self.mean.plain.astype(dtype)) * self.rdisp.plain
示例8: EvaluatorMSE
# 需要导入模块: from veles.memory import Array [as 别名]
# 或者: from veles.memory.Array import map_invalidate [as 别名]
#.........这里部分代码省略.........
self._local_size = [block_size]
self._global_size = self._local_size
self._global_size_find_closest_ = lambda: (self.batch_size,)
self._local_size_find_closest = None
def cuda_init(self):
if self.testing:
return
block_size = self._gpu_init()
self._local_size = (block_size, 1, 1)
self._global_size = (1, 1, 1)
self._global_size_find_closest_ = lambda: (self.batch_size, 1, 1)
self._local_size_find_closest = (1, 1, 1)
def _gpu_run(self):
self.unmap_vectors(self.err_output, self.output, self.target,
self.metrics, self.mse)
batch_size = self.batch_size
self.krn_constants_i_[0] = batch_size
self.set_arg(2, self.krn_constants_i_[0:1])
self.krn_constants_f_[0] = 1.0 / self.batch_size if self.mean else 1.0
self.set_arg(3, self.krn_constants_f_[0:1])
self.execute_kernel(self._global_size, self._local_size)
if self.labels and self.class_targets:
self.unmap_vectors(self.class_targets, self.labels, self.n_err)
self.execute_kernel(self._global_size_find_closest_(),
self._local_size_find_closest,
self.krn_find_closest_)
self.n_err.map_write()
self.n_err.mem[1] += batch_size
def ocl_run(self):
return self._gpu_run()
def cuda_run(self):
return self._gpu_run()
def numpy_run(self):
self.output.map_read()
self.target.map_read()
self.metrics.map_write()
self.err_output.map_invalidate()
self.mse.map_invalidate()
assert(self.output.size == self.target.size == self.err_output.size)
batch_size = self.batch_size
err_output = self.err_output.matrix[:batch_size]
assert_addr(err_output, self.err_output.mem)
output = self.output.matrix[:batch_size]
assert_addr(output, self.output.mem)
target = self.target.matrix[:batch_size]
assert_addr(target, self.target.mem)
mse = self.mse.mem[:batch_size]
assert_addr(mse, self.mse.mem)
err_output[:] = output - target
if not isinstance(self.normalizer, NoneNormalizer):
output_copy = output.copy()
target_copy = target.copy()
self.normalizer.denormalize(output_copy)
self.normalizer.denormalize(target_copy)
denormed_err_output = output_copy - target_copy
else:
denormed_err_output = err_output
self.err_output.mem[batch_size:] = 0
mse[:] = numpy.square(denormed_err_output).sum(axis=1) / \
denormed_err_output.shape[1]
if self.mean:
err_output /= batch_size
if self.root:
numpy.sqrt(mse, mse)
self.mse.mem[batch_size:] = 0
self.metrics.mem[0] += mse.sum()
self.metrics.mem[1] = max(self.metrics.mem[1], mse.max())
self.metrics.mem[2] = min(self.metrics.mem[2], mse.min())
if self.labels and self.class_targets:
self.class_targets.map_read()
self.labels.map_read()
self.n_err.map_write()
class_targets = self.class_targets.matrix
labels = self.labels.mem
for i, sample in enumerate(output):
lbl = numpy.linalg.norm(class_targets - sample,
axis=1).argmin()
if lbl != labels[i]:
self.n_err.mem[0] += 1
self.n_err.mem[1] += 1
def merge_output(self):
if not isinstance(self.normalizer, NoneNormalizer):
output = self.output[:self.batch_size].copy()
self.normalizer.denormalize(output)
else:
output = self.output.mem
self.merged_output[self.offset - self.batch_size:self.offset] = output
示例9: Forward
# 需要导入模块: from veles.memory import Array [as 别名]
# 或者: from veles.memory.Array import map_invalidate [as 别名]
class Forward(ForwardBase):
"""Class for forward propagation units.
Attributes:
input: input layer values.
output: output layer values.
weights: weights.
bias: bias.
weights_stddev: magnitude of the random distribution for weights.
bias_stddev: magnitude of the random distribution for bias.
rand: prng.Rand() object for initial weights generation.
"""
hide_from_registry = True
MAPPING = set()
def __init__(self, workflow, **kwargs):
kwargs["view_group"] = kwargs.get("view_group", "WORKER")
super(Forward, self).__init__(workflow, **kwargs)
self.weights_stddev = kwargs.get("weights_stddev")
self.bias_stddev = kwargs.get("bias_stddev", self.weights_stddev)
self.weights_filling = kwargs.get("weights_filling", "uniform")
self.bias_filling = kwargs.get("bias_filling", "uniform")
self.rand = kwargs.get("rand", prng.get())
self.weights_transposed = kwargs.get("weights_transposed", False)
self.include_bias = kwargs.get("include_bias", True)
self.demand("input")
self.output = Array(shallow_pickle=True)
self.weights = Array()
self.bias = Array()
self.forward_mode = False
self.exports = ["weights", "bias", "include_bias", "weights_transposed"]
def package_export(self):
data = {}
for attr in self.exports:
value = getattr(self, attr)
if value is not None:
if isinstance(value, Array):
value.map_read()
value = value.mem
data[attr] = value
return data
@property
def forward_mode(self):
return self._forward_mode
@forward_mode.setter
def forward_mode(self, value):
if not isinstance(value, bool):
raise TypeError("forward_mode must be boolean (got %s)" % type(value))
self._forward_mode = value
def initialize(self, device, **kwargs):
self.forward_mode = kwargs.get("forward_mode", False)
super(Forward, self).initialize(device=device, **kwargs)
def generate_data_for_slave(self, slave):
if self.forward_mode:
return None
data = [None, None]
if self.weights:
self.weights.map_read()
data[0] = self.weights.mem
if self.bias:
self.bias.map_read()
data[1] = self.bias.mem
return data
def generate_data_for_master(self):
return None
def apply_data_from_master(self, data):
if self.forward_mode:
return
if self.weights:
self.weights.map_invalidate()
numpy.copyto(self.weights.mem, data[0])
else:
self.weights.reset(data[0])
if self.bias:
self.bias.map_invalidate()
numpy.copyto(self.bias.mem, data[1])
else:
self.bias.reset(data[1])
def apply_data_from_slave(self, data, slave):
pass
def drop_slave(self, slave):
pass
示例10: Binarization
# 需要导入模块: from veles.memory import Array [as 别名]
# 或者: from veles.memory.Array import map_invalidate [as 别名]
class Binarization(AcceleratedUnit, EmptyDeviceMethodsMixin):
"""
Input Binarization. Input and output is 2d arrays of the same size.
Each element A(i,j) (in row i and column j) of input is a float
number between 0 and 1. Each element B(i,j) of output is equal 1 with
probability A(i,j) and 0 with 1 - A(i,j).
Must be assigned before initialize():
* input
Updates after run():
* output
Creates within initialize():
* output
Attributes:
input: input as batch of samples.
output: output as batch of samples.
"""
def __init__(self, workflow, **kwargs):
super(Binarization, self).__init__(workflow, **kwargs)
self.output = Array()
self.rand = kwargs.get("rand", prng.get())
self.demand("input", "batch_size")
def run(self):
"""Batch binarization on CPU only.
"""
self.output.map_invalidate()
self.input.map_read()
self.output.mem[:] = self.input.mem[:]
self.output.mem[:self.batch_size, :] = self.matlab_binornd(
1, self.input.mem[:self.batch_size, :])
def initialize(self, device, **kwargs):
super(Binarization, self).initialize(device=device, **kwargs)
if not self.output or self.output.size != self.input.size:
self.output.reset()
self.output.mem = numpy.zeros_like(self.input.mem)
self.output.initialize(self.device)
def matlab_binornd(self, n, p_in):
"""
Analogue binornd in Matlab, but n must be scalar.
The function generates a matrix of random variables,
where the element at (i,j) position is generated from binomial
distribution with the number of trials n and the probability of
success p_in(i,j).
Args:
n (int): number of trials
p_in (2 dimension numpy.array): success probability matrix
Returns:
res (2 dimension numpy.array): matrix of random variables
generated from the binomial distribution
"""
p = numpy.copy(p_in)
if len(p.shape) == 2:
nrow = p.shape[0]
ncol = p.shape[1]
p = numpy.transpose(p)
p = p.flatten()
dim = p.shape[0]
p = matlib.repmat(p, n, 1)
f = self.rand.rand(n, dim)
res = f < p
res = numpy.sum(res, axis=0)
res = numpy.transpose(res.reshape(ncol, nrow)).reshape(nrow, ncol)
elif len(p.shape) == 1:
p = matlib.repmat(p, n, 1)
dim = p.shape[0]
p = matlib.repmat(p, n, 1)
f = self.rand.rand(n, dim)
res = f < p
res = numpy.sum(res, axis=0)
else: # will make exeption
raise ValueError("shape of input Binarization class "
"must be 1 or 2 dimensions")
return res
示例11: InputJoiner
# 需要导入模块: from veles.memory import Array [as 别名]
# 或者: from veles.memory.Array import map_invalidate [as 别名]
#.........这里部分代码省略.........
if value is None:
self.num_inputs = 0
return
if not hasattr(value, "__iter__"):
raise TypeError("inputs must be iterable")
self.num_inputs = len(value)
for i, inp in enumerate(value):
setattr(self, "input_%d" % i, inp)
def link_inputs(self, other, *args):
"""Adds more inputs and links them.
It will link args to attributes named
"input_0", "input_1", etc.
Parameters:
other: unit from which to link attributes.
args: attribute names to link.
"""
if not len(args):
raise ValueError("args may not be empty")
num_inputs = self.num_inputs
self.num_inputs = num_inputs + len(args)
for arg in args:
self.link_attrs(other, ("input_%d" % num_inputs, arg))
num_inputs += 1
def _init_offset_length_attributes(self):
"""Initializes offset_0, offset_1, ...
length_0, length_1, ...
"""
offset = 0
for i in range(self.num_inputs):
inp = getattr(self, "input_%d" % i)
setattr(self, "offset_%d" % i, offset)
setattr(self, "length_%d" % i, inp.sample_size)
offset += inp.sample_size
def initialize(self, device, **kwargs):
if any(i.mem is None for i in self.inputs):
# Not yet ready to initialize
return True
self._init_offset_length_attributes()
super(InputJoiner, self).initialize(device=device, **kwargs)
minibatch_size = min(i.shape[0] for i in self.inputs)
if any(i.shape[0] > minibatch_size for i in self.inputs):
self.warning("Detected inputs of different sizes. Sizes will be "
"cut to the lowest value (%d)", minibatch_size)
output_shape = (minibatch_size,
sum(i.size // i.shape[0] for i in self.inputs))
if not self.output:
self.output.reset(numpy.zeros(output_shape, self.inputs[0].dtype))
else:
assert self.output.shape == output_shape
self.init_vectors(self.output, *self.inputs)
def _gpu_init(self):
defines = {
'etype': opencl_types.numpy_dtype_to_opencl(self.output.dtype),
}
self.build_program(
defines, "%s_%d_%s" %
(type(self).__name__, self.output.shape[0],
"_".join(map(str, self.output.shape[1:]))), inputs=self.inputs)
self.assign_kernel("join")
self.set_args(self.output, *self.inputs)
def ocl_init(self):
self._gpu_init()
def cuda_init(self):
self._gpu_init()
def numpy_run(self):
self.output.map_invalidate() # we will update output on CPU
minibatch_size = self.output.shape[0]
low = 0
for inp in self.inputs:
inp.map_read()
high = low + inp.size // inp.shape[0]
if low >= high:
break
self.output.mem[:, low:high] = inp[:minibatch_size]
low = high
def ocl_run(self):
for inp in self.inputs:
inp.unmap()
self.execute_kernel(*((self.output.shape[0],),) * 2)
def cuda_run(self):
for inp in self.inputs:
inp.unmap()
# TODO(a.kazantsev): rewrite CUDA kernel for proper grid size
self.execute_kernel((1, 1, 1), (self.output.shape[0], 1, 1))
示例12: GDMultiplier
# 需要导入模块: from veles.memory import Array [as 别名]
# 或者: from veles.memory.Array import map_invalidate [as 别名]
class GDMultiplier(AcceleratedUnit):
"""Gradient descent for Multiplier.
"""
def __init__(self, workflow, **kwargs):
super(GDMultiplier, self).__init__(workflow, **kwargs)
self.err_x = Array()
self.err_y = Array()
self.demand("x", "y", "err_output")
def initialize(self, device, **kwargs):
super(GDMultiplier, self).initialize(device, **kwargs)
if not self.err_x:
self.err_x.reset(numpy.zeros_like(self.x.mem))
else:
assert self.err_x.shape == self.x.shape
if not self.err_y:
self.err_y.reset(numpy.zeros_like(self.y.mem))
else:
assert self.err_y.shape == self.y.shape
self.init_vectors(self.err_x, self.err_y,
self.x, self.y, self.err_output)
def init_unpickled(self):
super(GDMultiplier, self).init_unpickled()
self.sources_["multiplier"] = {}
def _gpu_init(self):
self.build_program({"OUTPUT_SIZE": self.err_output.size},
"%s_%d" %
(self.__class__.__name__, self.err_output.size),
dtype=self.x.dtype)
self.assign_kernel("multiply_backward")
self.set_args(self.x, self.y, self.err_output, self.err_x, self.err_y)
def cuda_init(self):
self._gpu_init()
block_size = self.device.suggest_block_size(self._kernel_)
self._global_size = (
int(numpy.ceil(self.err_output.size / block_size)), 1, 1)
self._local_size = (block_size, 1, 1)
def ocl_init(self):
self._gpu_init()
self._global_size = (self.err_output.size, 1, 1)
self._local_size = None
def numpy_init(self):
pass # nothing to init
def _gpu_run(self):
self.unmap_vectors(self.x, self.y, self.err_output,
self.err_x, self.err_y)
self.execute_kernel(self._global_size, self._local_size)
def cuda_run(self):
self._gpu_run()
def ocl_run(self):
self._gpu_run()
def numpy_run(self):
self.x.map_read()
self.y.map_read()
self.err_output.map_read()
self.err_x.map_invalidate()
self.err_y.map_invalidate()
numpy.multiply(self.err_output.mem, self.y.mem, self.err_x.mem)
numpy.multiply(self.err_output.mem, self.x.mem, self.err_y.mem)
示例13: KohonenForward
# 需要导入模块: from veles.memory import Array [as 别名]
# 或者: from veles.memory.Array import map_invalidate [as 别名]
#.........这里部分代码省略.........
if chunk_size < 2:
chunk_size = self.neurons_number // 2 + 1
self.argmin_group_size = \
int(numpy.ceil(self.neurons_number / chunk_size))
block_size, vector_opt = self.device.device_info.get_kernel_bs_vo(
kernel="matrix_multiplication", dtype=self.input.dtype)
defines = {
'BLOCK_SIZE': block_size,
'VECTOR_OPT': int(bool(vector_opt)),
'BATCH': batch_size,
'SAMPLE_LENGTH': self.sample_length,
'NEURONS_NUMBER': self.neurons_number,
'CHUNK_SIZE': chunk_size,
'COPY_CHUNK_SIZE': copy_chunk_size,
}
if self.weights_transposed:
defines['WEIGHTS_TRANSPOSED'] = 1
self.build_program(defines, "%s_%d_%d_%d" %
(self.__class__.__name__,
batch_size, self.sample_length,
self.neurons_number),
dtype=self.weights.mem.dtype)
if self.total is not None:
self._set_total_global_size_ = \
[int(numpy.ceil(batch_size / copy_chunk_size))]
self._krn_set_total_ = self.get_kernel("set_total")
self._krn_set_total_.set_args(self.output.devmem, cl.skip,
self.total.devmem)
if self.argmins is not None:
return
self._krn_distances_ = self.get_kernel("calculate_distances")
self._krn_distances_.set_args(self.input.devmem, self.weights.devmem,
self._distances.devmem)
self._krn_argmin_ = self.get_kernel("calculate_argmin")
self._krn_argmin_.set_args(self._distances.devmem, self.output.devmem,
None)
self._gs_distance = [
roundup(self.neurons_number, block_size),
roundup(batch_size, block_size)]
self._ls_distance = [block_size, block_size]
def ocl_run(self):
self.output.unmap()
if self.total is not None:
self.total.unmap()
if self.argmins is None:
self.input.unmap()
self.weights.unmap()
self.execute_kernel(self._gs_distance, self._ls_distance,
self._krn_distances_)
self.execute_kernel([self.argmin_group_size],
[self.argmin_group_size],
self._krn_argmin_)
else:
self.argmins.unmap()
self.argmins.map_read()
self.output.map_write()
self.output.mem[:] = self.argmins.mem
self.output.unmap()
self.argmins.unmap()
if self.total is not None:
self._minibatch_offset_[0] = \
self.minibatch_offset - self.minibatch_size
self._krn_set_total_.set_arg(1, self._minibatch_offset_)
self.execute_kernel(self._set_total_global_size_, None,
self._krn_set_total_)
def numpy_run(self):
self.output.map_invalidate()
if self.argmins is not None:
self.argmins.map_read()
self.output.mem[:] = self.argmins.mem
else:
self.input.map_read()
self.weights.map_read()
if self.total is not None:
self.total.map_invalidate()
length = self.minibatch_size if self.total is not None \
else self.input.mem.shape[0]
for sindex in range(length):
if self.argmins is None:
dist = self.weights.mem - self.input[sindex]
winner = numpy.argmin(self.numpy_linalg_norm(dist))
self.output[sindex] = winner
else:
winner = self.argmins[sindex]
if self.total is not None:
index = sindex + self.minibatch_offset - self.minibatch_size
self.total[index] = winner
示例14: KohonenTrainer
# 需要导入模块: from veles.memory import Array [as 别名]
# 或者: from veles.memory.Array import map_invalidate [as 别名]
#.........这里部分代码省略.........
batch_size, self._sample_length,
self._neurons_number),
dtype=self.weights.mem.dtype)
self.ocl_consts_ = numpy.zeros(1, dtype=self.weights.mem.dtype)
self._krn_distances_ = self.get_kernel("calculate_distances")
self._krn_distances_.set_args(self.input.devmem, self.weights.devmem,
self._distances.devmem)
self._krn_argmin_ = self.get_kernel("calculate_argmin")
self._krn_argmin_.set_args(self._distances.devmem, self.argmins.devmem,
self.winners.devmem)
self._krn_gravity_ = self.get_kernel("compute_gravity")
self._krn_gravity_.set_args(self.argmins.devmem, self._coords.devmem)
self._krn_gravity_.set_arg(3, self._distances.devmem)
self._krn_apply_gradient_ = self.get_kernel("apply_gradient")
self._krn_apply_gradient_.set_args(self.input.devmem,
self._distances.devmem)
self._krn_apply_gradient_.set_arg(3, self.weights.devmem)
self._gs_distance = [
roundup(self._neurons_number, block_size),
roundup(batch_size, block_size)]
self._ls_distance = [block_size, block_size]
def iteration(fn):
def wrapped(self, *args, **kwargs):
result = fn(self, *args, **kwargs)
self.time += 1
return result
name = getattr(fn, '__name__', getattr(fn, 'func', wrapped).__name__)
wrapped.__name__ = name + '_iteration'
return wrapped
@iteration
def numpy_run(self):
batch_size = self.input.mem.shape[0]
neurons_number = self._neurons_number
dists = numpy.empty(neurons_number)
gradients = numpy.zeros(self.weights.mem.shape)
sigma = self.gravity_radius
gmult = self.gradient_multiplier
self.input.map_read()
self.weights.map_invalidate()
self.winners.map_invalidate()
for sindex in range(batch_size):
dist = self.weights.mem - self.input[sindex]
winner = numpy.argmin(self.numpy_linalg_norm(dist))
self.winners[winner] += 1
winner_coords = self._coords.mem[winner]
for nindex in range(neurons_number):
dist = self._coords.mem[nindex] - winner_coords
dists[nindex] = numpy.sum(dist * dist)
gravity = numpy.exp(dists / (-2 * sigma * sigma))
gradients += gravity.reshape((1, neurons_number)).transpose() * \
(self.input[sindex] - self.weights.mem) * gmult
self.weights.mem += gradients
@iteration
def ocl_run(self):
self.unmap_vectors(self.input, self.weights, self.winners,
self._distances, self.argmins, self._coords)
batch_size = self.input.mem.shape[0]
self.execute_kernel(self._gs_distance, self._ls_distance,
self._krn_distances_)
self.execute_kernel([self.argmin_group_size],
[self.argmin_group_size],
self._krn_argmin_)
self.ocl_consts_[0] = self.gravity_radius
self._krn_gravity_.set_arg(2, self.ocl_consts_[0:1])
self.execute_kernel([batch_size, self._neurons_number], None,
self._krn_gravity_)
self.ocl_consts_[0] = self.gradient_multiplier
self._krn_apply_gradient_.set_arg(2, self.ocl_consts_[0:1])
self.execute_kernel(
[int(numpy.ceil(self._sample_length / self.device.max_group_size)),
self.device.max_group_size],
None, self._krn_apply_gradient_)
iteration = staticmethod(iteration)
def _get_weights_magnitude(self):
"""
Returns: weights magnitude for initial random distribution,
such that activation function will be near maximum
if all input values are at their supposed max value.
Doesn't matter for classic Kohonen networks,
get values as in All2AllTanh.
"""
d = self.input.max_supposed * self._sample_length
if self.input.mem.dtype in (numpy.complex64, numpy.complex128):
return 1.0 / d
return 9.0 / d
示例15: ZeroFiller
# 需要导入模块: from veles.memory import Array [as 别名]
# 或者: from veles.memory.Array import map_invalidate [as 别名]
class ZeroFiller(ForwardBase, TriviallyDistributable):
"""Fills weights of given unit with zero on every step"""
MAPPING = {"zero_filter"}
def __init__(self, workflow, **kwargs):
super(ZeroFiller, self).__init__(workflow, **kwargs)
self.mask = Array()
self.grouping = kwargs.get("grouping", 1)
self.demand("weights")
def init_unpickled(self):
super(ZeroFiller, self).init_unpickled()
self.sources_["weights_zerofilling"] = {}
@property
def effective_shape(self):
return (self.weights.shape[0],
self.weights.size // self.weights.shape[0])
@property
def grouping(self):
return self._grouping
@grouping.setter
def grouping(self, value):
if not isinstance(value, int):
raise TypeError(
"grouping value must be an integer (got %s)" % type(value))
if value < 2:
raise ValueError("grouping value %d is invalid" % value)
self._grouping = value
def initialize(self, device=None, **kwargs):
super(ZeroFiller, self).initialize(device, **kwargs)
if not self.weights:
return True
if not self.mask:
if self.effective_shape[1] % self.grouping != 0:
raise ValueError(
"Non-multiple of grouping weights shape detected: "
"%s, grouping=%d" %
(self.weights.shape, self.grouping))
self.mask.reset(numpy.zeros(self.effective_shape,
dtype=self.weights.dtype))
self.mask.map_invalidate()
# TODO(a.kazantsev): add check for transposed weights.
for kernel in range(self.effective_shape[0]):
for chan in range(self.effective_shape[1]):
self.mask[kernel, chan] = not (
kernel % self.grouping == chan % self.grouping)
else:
assert self.mask.shape == self.effective_shape
for vec in self.mask, self.weights:
vec.initialize(device)
def _gpu_init(self):
self.build_program(cache_file_name="zero_filling_%d" % self.grouping,
dtype=self.weights.dtype)
self.assign_kernel("multiply_by_mask")
self.set_args(self.mask, self.weights)
def ocl_init(self):
self._gpu_init()
self._global_size = [self.weights.size]
self._local_size = None
def cuda_init(self):
self._gpu_init()
self._global_size = (self.weights.size, 1, 1)
self._local_size = (1, 1, 1)
def numpy_run(self):
self.mask.map_read()
self.weights.map_write()
self.weights.mem *= self.mask.mem
def _gpu_run(self):
self.weights.unmap()
self.mask.unmap()
self.execute_kernel(self._global_size, self._local_size)
def ocl_run(self):
self._gpu_run()
def cuda_run(self):
self._gpu_run()