本文整理汇总了Python中pyopencl.mem_flags方法的典型用法代码示例。如果您正苦于以下问题:Python pyopencl.mem_flags方法的具体用法?Python pyopencl.mem_flags怎么用?Python pyopencl.mem_flags使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyopencl
的用法示例。
在下文中一共展示了pyopencl.mem_flags方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: create_internal_buffer
# 需要导入模块: import pyopencl [as 别名]
# 或者: from pyopencl import mem_flags [as 别名]
def create_internal_buffer(self, ctx):
cityxy = [(self.city_info[idx][0], self.city_info[idx][1]) for idx in range(len(self.city_info))]
self.__np_cityxy = numpy.array(cityxy, dtype=numpy.float32)
mf = cl.mem_flags
self.__dev_cityxy = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__np_cityxy)
self.__dev_cityxy = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__np_cityxy)
self.__np_iterations = numpy.int32(self.iterations)
self.__np_temperature = numpy.float32(self.temperature)
self.__np_terminate_temperature = numpy.float32(self.terminate_temperature)
self.__np_alpha = numpy.float32(self.alpha)
示例2: __init_cl_member
# 需要导入模块: import pyopencl [as 别名]
# 或者: from pyopencl import mem_flags [as 别名]
def __init_cl_member(self):
self.__np_costs = self.sas.get_cost_buffer()
num_of_solution, self.__np_solution = self.sas.get_solution_info()
mf = cl.mem_flags
# Random number should be given by Host program because OpenCL doesn't have a random number
# generator. We just include one, Noise.cl.
rnum = [random.randint(0, 4294967295) for i in range(num_of_solution)]
## note: numpy.random.rand() gives us a list float32 and we cast it to uint32 at the calling
## of kernel function. It just views the original byte order as uint32.
self.__dev_rnum = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=numpy.array(rnum, dtype=numpy.uint32))
self.__dev_costs = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__np_costs)
self.__dev_solution = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__np_solution)
self.sas.create_internal_buffer(self.__ctx)
## To save the annealing state
示例3: __prepare_fitness_args
# 需要导入模块: import pyopencl [as 别名]
# 或者: from pyopencl import mem_flags [as 别名]
def __prepare_fitness_args(self):
mf = cl.mem_flags
self.__fitness_args_list = [self.__dev_chromosomes, self.__dev_fitnesses]
self.__extra_fitness_args_list = []
if self.__fitness_args is not None:
## create buffers for fitness arguments
for arg in self.__fitness_args:
cl_buffer = cl.Buffer(self.__ctx,
mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=numpy.array(arg['v'],
dtype=self.__type_to_numpy_type(arg['t'])))
self.__extra_fitness_args_list.append(cl_buffer)
# concatenate two fitness args list
self.__fitness_args_list = self.__fitness_args_list + self.__extra_fitness_args_list
示例4: __init__
# 需要导入模块: import pyopencl [as 别名]
# 或者: from pyopencl import mem_flags [as 别名]
def __init__(self, device=0, platform=0, iterations=7, compiler_options=None):
"""Creates OpenCL device context and reads device properties
:param device: The ID of the OpenCL device to use for benchmarking
:type device: int
:param iterations: The number of iterations to run the kernel during benchmarking, 7 by default.
:type iterations: int
"""
if not cl:
raise ImportError("Error: pyopencl not installed, please install e.g. using 'pip install pyopencl'.")
self.iterations = iterations
#setup context and queue
platforms = cl.get_platforms()
self.ctx = cl.Context(devices=[platforms[platform].get_devices()[device]])
self.queue = cl.CommandQueue(self.ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
self.mf = cl.mem_flags
#inspect device properties
self.max_threads = self.ctx.devices[0].get_info(cl.device_info.MAX_WORK_GROUP_SIZE)
self.compiler_options = compiler_options or []
#collect environment information
dev = self.ctx.devices[0]
env = dict()
env["platform_name"] = dev.platform.name
env["platform_version"] = dev.platform.version
env["device_name"] = dev.name
env["device_version"] = dev.version
env["opencl_c_version"] = dev.opencl_c_version
env["driver_version"] = dev.driver_version
env["iterations"] = self.iterations
env["compiler_options"] = compiler_options
self.env = env
self.name = dev.name
示例5: create_bytearray
# 需要导入模块: import pyopencl [as 别名]
# 或者: from pyopencl import mem_flags [as 别名]
def create_bytearray(ctx, size):
mf = cl.mem_flags
py_buffer = numpy.zeros(size, dtype=numpy.int32)
cl_buffer = cl.Buffer(ctx,
mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=py_buffer)
return py_buffer, cl_buffer
示例6: preexecute_kernels
# 需要导入模块: import pyopencl [as 别名]
# 或者: from pyopencl import mem_flags [as 别名]
def preexecute_kernels(self, ctx, queue, population):
## initialize global variables for kernel execution
total_dna_size = population * self.dna_total_length
other_chromosomes = numpy.zeros(total_dna_size, dtype=numpy.int32)
cross_map = numpy.zeros(total_dna_size, dtype=numpy.int32)
ratios = numpy.zeros(population, dtype=numpy.float32)
mf = cl.mem_flags
self.__dev_ratios = cl.Buffer(ctx, mf.WRITE_ONLY, ratios.nbytes)
self.__dev_other_chromosomes = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=other_chromosomes)
self.__dev_cross_map = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=cross_map)
示例7: __restore_state
# 需要导入模块: import pyopencl [as 别名]
# 或者: from pyopencl import mem_flags [as 别名]
def __restore_state(self, data):
# restore algorithm information
self.__prob_mutation = data['prob_mutation']
self.__prob_crossover = data['prob_crossover']
self.__generation_index = data['generation_idx']
self.__dictStatistics = data['statistics']
self.__generation_time_diff = data['generation_time_diff']
self.__population = data['population']
rnum = data['rnum']
self.__fitnesses = data['fitnesses']
self.__np_chromosomes = data['chromosomes']
self.__best_fitnesses = data['best']
self.__worst_fitnesses = data['worst']
self.__avg = data['avg']
# build CL memory from restored memory
mf = cl.mem_flags
self.__dev_rnum = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=numpy.array(rnum, dtype=numpy.uint32))
self.__dev_chromosomes = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__np_chromosomes)
self.__dev_fitnesses = cl.Buffer(self.__ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR,
hostbuf=self.__fitnesses)
self.__prepare_fitness_args()
self.__sample_chromosome.restore(data, self.__ctx, self.__queue, self.__population)
self._paused = True
# public methods
示例8: restore
# 需要导入模块: import pyopencl [as 别名]
# 或者: from pyopencl import mem_flags [as 别名]
def restore(self, data, ctx, queue, population):
other_chromosomes = data['other_chromosomes']
ratios = data['ratios']
# prepare CL memory
mf = cl.mem_flags
self.__dev_ratios = cl.Buffer(ctx, mf.WRITE_ONLY, ratios.nbytes)
self.__dev_other_chromosomes = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=other_chromosomes)
# Copy data from main memory to GPU memory
cl.enqueue_copy(queue, self.__dev_ratios, ratios)
cl.enqueue_copy(queue, self.__dev_other_chromosomes, other_chromosomes)
示例9: krak
# 需要导入模块: import pyopencl [as 别名]
# 或者: from pyopencl import mem_flags [as 别名]
def krak(bnum,bdata,cdata):
t=[]
ctr=1
for table in mytables:
for pos in range(0,len(bdata)-63):
for color in range(0,8):
sample=bdata[pos:pos+64]
if not cdata:
t.append(int("0b%s"%sample, 2)) # pivot
for i in range(0,8):
t.append(tables.rft[table-100][i])
t.append(color)
t.append(0x7)
t.append(0x0)
#print("sample %s, color %x, table %i"%(sample,color,table))
else: # hunting for a challenge
if cdata[ctr] != 0: # block was found in table
pivot = revbits(cdata[ctr])
target = int("0b%s"%sample, 2)
t.append(pivot)
for i in range(0,8):
t.append(tables.rft[table-100][i])
t.append(0x0)
scolor=color|pos<<4|int(bnum)<<12
t.append(scolor)
t.append(target)
#if pos<5:
# print("pos %i, sample %s, color %x, s_color %x, table %i, pivot %x, target %x"%(pos,sample,color,scolor,table,pivot,target))
ctr+=1
a = np.array(t,dtype=np.uint64)
a_dev = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
hostbuf=a)
s = np.uint32(a.shape)
r = np.uint32(5000)
z = np.uint32(0)
# compile the kernel
FILE_NAME="krak.cl"
f=open(FILE_NAME,"r")
SRC = ''.join(f.readlines())
f.close()
prg = cl.Program(ctx, SRC).build()
x = time.time()
# launch the kernel
print("Launching kernel, size %i",a.shape)
event = prg.krak(queue, a.shape, None, a_dev, s)
event.wait()
# copy the output from the context to the Python process
cl.enqueue_copy(queue, a, a_dev)
print("lag=%f"%(time.time()-x))
report(a)
示例10: krak
# 需要导入模块: import pyopencl [as 别名]
# 或者: from pyopencl import mem_flags [as 别名]
def krak(bnum,bdata,cdata):
t=[]
ctr=0
for table in mytables:
for pos in range(0,len(bdata)-63):
for color in range(0,8):
sample=bdata[pos:pos+64]
if not cdata:
t.append(int("0b%s"%sample, 2)) # pivot
for i in range(0,8):
t.append(tables.rft[table-100][i])
t.append(color)
t.append(0x7)
t.append(0x0)
#print("sample %s, color %x, target %x"%(sample,color,0))
else: # hunting for a challenge
if cdata[ctr] != 0: # block was found in table
pivot = revbits(cdata[ctr])
target = int("0b%s"%sample, 2)
t.append(pivot)
for i in range(0,8):
t.append(tables.rft[table-100][i])
t.append(0x0)
t.append(color|pos<<4|int(bnum)<<12)
t.append(target)
#print("sample %s, pivot %x, target %x"%(sample,pivot,target))
ctr+=1
a = np.array(t,dtype=np.uint64)
a_dev = cl.Buffer(ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
hostbuf=a)
s = np.uint32(a.shape)
r = np.uint32(5000)
z = np.uint32(0)
# compile the kernel
FILE_NAME="krak.cl"
f=open(FILE_NAME,"r")
SRC = ''.join(f.readlines())
f.close()
prg = cl.Program(ctx, SRC).build()
x = time.time()
# launch the kernel
print("Launching kernel, size %i",a.shape)
event = prg.krak(queue, a.shape, None, a_dev, s)
event.wait()
# copy the output from the context to the Python process
cl.enqueue_copy(queue, a, a_dev)
print("lag=%f"%(time.time()-x))
report(a)
示例11: compute
# 需要导入模块: import pyopencl [as 别名]
# 或者: from pyopencl import mem_flags [as 别名]
def compute(self, video, num_frame):
"""
input:
video: numpy array gray image of type input_dtype [num_frames, h, w] or [num_frames, h, w, 1]
num_frame: frame for which to compute the neighbors
output:
numpy array (video height/step, video width/step, num_neighbors) of indices of nearest neighbors.
[i, j, :] contains the indices of the top left pixel of patches of size patch_data_width * patch_data_width.
The matches have been computed for patches of size patch_search_width * patch_search_width.
If [i, j] is the top left pixel of a given search patch, [i, j] points to the top left of the matched
data patches with same patch center (see comments in the code for more details).
The indices convert to (y, x) by doing indice / width and indice % width.
"""
assert(video.dtype.type == self.input_dtype)
assert(len(video.shape) == 3 or (len(video.shape) == 4 and video.shape[3] == 1))
nf = video.shape[0]
h = video.shape[1]
w = video.shape[2]
assert(num_frame >= self.past_frames and num_frame < (nf-self.future_frames))
assert(nf*h*w <= 4294967295) # patch indice overflow (stored on uint32). Use video extract.
mf = cl.mem_flags
mp = cl.map_flags
video_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=video)
dst_pos_cl = cl.Buffer(self.ctx, mf.WRITE_ONLY, (w // self.step) * (h // self.step) * self.num_neighbors * 4)
global_size = [h//self.step, DIVUP(w, self.wksize - (self.patch_search_width - 1)) * self.wksize]
local_size = [1, self.wksize]
# For naive kernel:
#global_size = [w, h]
#local_size = None
self.compute_nn(self.queue, global_size, local_size, dst_pos_cl, video_cl, w, h, w, w*h, 0, 0, num_frame*w*h, 0)
dst_pos = np.empty([h // self.step, w // self.step, self.num_neighbors], dtype=np.uint32)
cl.enqueue_copy(self.queue, dst_pos, dst_pos_cl)
# At this point, dst_pos[i, j, :] contains the indices (top left of patch_search_width*patch_search_width
# patches) of matches for the patch with top left (i, j).
# We now shift such that the [i, j] points to the top left of the patch_data_width*patch_data_width
# patch centered on the same point than the patch of size patch_search_width*patch_search_width
# This enables to have
# anchor = self.patch_width_nn//2
# nn_patch[y-anchor, x-anchor,:] points to the top left of the patch of size patch_data_width
# * patch_data_width centered in [y, x].
if (self.patch_search_width != self.patch_data_width):
diff_xy = (self.patch_search_width - self.patch_data_width) // 2
index_offset = diff_xy * w + diff_xy
dst_pos = dst_pos + index_offset
return dst_pos
示例12: run
# 需要导入模块: import pyopencl [as 别名]
# 或者: from pyopencl import mem_flags [as 别名]
def run(vector):
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
f = open('casting_vector.c' if vector else 'casting.c', 'r')
fstr = ''.join(f.readlines())
f.close()
data_size = 100;
global_size = int(data_size / 4) if vector else data_size
if vector:
struct = 'typedef struct {\n'
code = ' switch(id) {\n'
codeTemp = ' case {0}:\n tsR->data{0} = tsA->data{0} + tsB->data{0};\n break;\n'
for i in range(global_size):
struct += ' float4 data' + str(i) + ';\n'
code += codeTemp.format(i)
struct += '} TypedStruct2;\n'
code += ' }\n'
fstr = fstr.replace('%code_generation%', code);
fstr = '#define GLOBAL_SIZE ' + str(global_size) + '\n' + struct + fstr
else:
fstr = '#define GLOBAL_SIZE ' + str(global_size) + '\n' + fstr;
print('=' * 50)
print(fstr)
print('-' * 50)
a_np = np.random.rand(data_size).astype(np.float32)
b_np = np.random.rand(data_size).astype(np.float32)
mf = cl.mem_flags
a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)
res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
prg = cl.Program(ctx, fstr).build();
exec_evt = prg.casting_test(queue, (global_size,), None, a_g, b_g, res_g)
exec_evt.wait()
res_np = np.empty_like(a_np)
cl.enqueue_copy(queue, res_np, res_g).wait()
print(res_np)
elapsed = 1e-9 * (exec_evt.profile.end - exec_evt.profile.start)
print('Vector: {0} => Execution time of test: {1}s'.format(vector, elapsed))
示例13: __prepare_cl_buffers
# 需要导入模块: import pyopencl [as 别名]
# 或者: from pyopencl import mem_flags [as 别名]
def __prepare_cl_buffers(self):
mf = cl.mem_flags
# prepare distances buffers
self.__path_distances = numpy.zeros(shape=[self.__node_count, self.__node_count],
dtype=numpy.float32)
self.__dev_path_distances = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__path_distances)
# initialize all pheromones of paths with 1
self.__path_pheromones = numpy.empty(shape=[self.__node_count, self.__node_count],
dtype=numpy.float32)
self.__path_pheromones.fill(1)
self.__dev_path_pheromones = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__path_pheromones)
# prepare buffers for node position: x, y
x = numpy.empty(self.__node_count, dtype=numpy.float32)
y = numpy.empty(self.__node_count, dtype=numpy.float32)
for i in range(self.__node_count):
x[i] = self.__nodes[i][0]
y[i] = self.__nodes[i][1]
self.__dev_x = cl.Buffer(self.__ctx, mf.READ_ONLY | mf.COPY_HOST_PTR,
hostbuf=x)
self.__dev_y = cl.Buffer(self.__ctx, mf.READ_ONLY | mf.COPY_HOST_PTR,
hostbuf=y)
# Random number should be given by Host program because OpenCL doesn't have a random number
# generator. We just include one, Noise.cl.
rnum = [random.randint(0, 4294967295) for i in range(self.__ants)]
## note: numpy.random.rand() gives us a list float32 and we cast it to uint32 at the calling
## of kernel function. It just views the original byte order as uint32.
self.__dev_rnum = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=numpy.array(rnum, dtype=numpy.uint32))
# we should prepare buffer memory for each ant on each node.
buffer_size = 4 * self.__node_count * self.__ants
# the visited_nodes is used for storing the path of an ant.
self.__dev_visited_nodes = cl.Buffer(self.__ctx, mf.READ_WRITE, buffer_size)
# the path_probabilities is used for choosing next node
self.__dev_path_probabilities = cl.Buffer(self.__ctx, mf.READ_WRITE, buffer_size)
# the tmp pheromones is used for calcuating probabilities for next node
self.__dev_tmp_pheromones = cl.Buffer(self.__ctx, mf.READ_WRITE, buffer_size)
# this is for keepting fitness value of each ant at a single round.
self.__dev_ant_fitnesses = cl.Buffer(self.__ctx, mf.READ_WRITE, 4 * self.__ants)
示例14: __preexecute_kernels
# 需要导入模块: import pyopencl [as 别名]
# 或者: from pyopencl import mem_flags [as 别名]
def __preexecute_kernels(self):
total_dna_size = self.__population * self.__sample_chromosome.dna_total_length
self.__fitnesses = numpy.zeros(self.__population, dtype=numpy.float32)
self.__np_chromosomes = numpy.zeros(total_dna_size, dtype=numpy.int32)
mf = cl.mem_flags
# Random number should be given by Host program because OpenCL doesn't have a random number
# generator. We just include one, Noise.cl.
rnum = [random.randint(0, 4294967295) for i in range(self.__population)]
## note: numpy.random.rand() gives us a list float32 and we cast it to uint32 at the calling
## of kernel function. It just views the original byte order as uint32.
self.__dev_rnum = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=numpy.array(rnum, dtype=numpy.uint32))
self.__dev_chromosomes = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__np_chromosomes)
self.__dev_fitnesses = cl.Buffer(self.__ctx, mf.WRITE_ONLY, self.__fitnesses.nbytes)
self.__prepare_fitness_args()
if self.__is_elitism_mode:
self.__elites_updated = False
self.__current_elites = numpy.zeros(self.__sample_chromosome.dna_total_length * self.__elitism_top,
dtype=numpy.int32)
self.__dev_current_elites = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__current_elites)
self.__updated_elites = numpy.zeros(self.__sample_chromosome.dna_total_length * self.__elitism_top,
dtype=numpy.int32)
self.__dev_updated_elites = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__updated_elites)
self.__updated_elite_fitnesses = numpy.zeros(self.__elitism_top,
dtype=numpy.float32)
self.__dev_updated_elite_fitnesses = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__updated_elite_fitnesses)
# For statistics
self.__dev_best_indices = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__best_indices)
self.__dev_worst_indices = cl.Buffer(self.__ctx, mf.READ_WRITE | mf.COPY_HOST_PTR,
hostbuf=self.__worst_indices)
cl.enqueue_copy(self.__queue, self.__dev_fitnesses, self.__fitnesses)
## call preexecute_kernels for internal data structure preparation
self.__sample_chromosome.preexecute_kernels(self.__ctx, self.__queue, self.__population)
## dump information on kernel resources usage
self.__dump_kernel_info(self.__prg, self.__ctx, self.__sample_chromosome)
## Populate the first generation.