本文整理汇总了Python中torch.multiprocessing.Event方法的典型用法代码示例。如果您正苦于以下问题:Python multiprocessing.Event方法的具体用法?Python multiprocessing.Event怎么用?Python multiprocessing.Event使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.multiprocessing
的用法示例。
在下文中一共展示了multiprocessing.Event方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import Event [as 别名]
def __init__(self, fp16=False, mean=(0., 0., 0.), std=(1., 1., 1.), pin_memory=True, **kwargs):
super().__init__(**kwargs)
print('Using DALI CPU iterator')
self.stream = torch.cuda.Stream()
self.fp16 = fp16
self.mean = torch.tensor(mean).cuda().view(1, 3, 1, 1)
self.std = torch.tensor(std).cuda().view(1, 3, 1, 1)
self.pin_memory = pin_memory
if self.fp16:
self.mean = self.mean.half()
self.std = self.std.half()
self.proc_next_input = Event()
self.done_event = Event()
self.output_queue = queue.Queue(maxsize=5)
self.preproc_thread = threading.Thread(
target=_preproc_worker,
kwargs={'dali_iterator': self._dali_iterator, 'cuda_stream': self.stream, 'fp16': self.fp16, 'mean': self.mean, 'std': self.std, 'proc_next_input': self.proc_next_input, 'done_event': self.done_event, 'output_queue': self.output_queue, 'pin_memory': self.pin_memory})
self.preproc_thread.daemon = True
self.preproc_thread.start()
self.proc_next_input.set()
示例2: run_in_process_group
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import Event [as 别名]
def run_in_process_group(world_size, filename, fn, inputs):
if torch.distributed.is_initialized():
torch.distributed.destroy_process_group()
processes = []
q = Queue()
wait_event = Event()
# run the remaining processes
# for rank in range(world_size - 1):
for rank in range(world_size):
p = Process(
target=init_and_run_process,
args=(rank, world_size, filename, fn, inputs[rank], q, wait_event),
)
p.start()
processes.append(p)
# fetch the results from the queue before joining, the background processes
# need to be alive if the queue contains tensors. See
# https://discuss.pytorch.org/t/using-torch-tensor-over-multiprocessing-queue-process-fails/2847/3 # noqa: B950
results = []
for _ in range(len(processes)):
results.append(q.get())
wait_event.set()
for p in processes:
p.join()
return results
示例3: get_multiprocess_batch_queue
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import Event [as 别名]
def get_multiprocess_batch_queue(name_prefix: str, target_function, files, conf, _logger, queue_size=100) -> Tuple[mp.Queue, List[mp.Process], mp.Event]:
ctx = mp.get_context('spawn') # also set so that windows & linux behave the same
_queue = ctx.Queue(queue_size)
_processes = []
_finish_notification = ctx.Event()
if len(files) == 0:
_logger.error("No files for multiprocess loading specified, for: " + name_prefix)
exit(1)
else:
_logger.info("Starting "+str(len(files))+" data loader processes, for:" + name_prefix)
if conf["token_embedder_type"] == "fasttext":
global fasttext_vocab_cached_mapping
global fasttext_vocab_cached_data
if fasttext_vocab_cached_data is None:
fasttext_vocab_cached_mapping, fasttext_vocab_cached_data = FastTextVocab.load_ids(conf["fasttext_vocab_mapping"],conf["fasttext_max_subwords"])
fasttext_vocab_cached_data.share_memory_()
for proc_number, file in enumerate(files):
process = ctx.Process(name=name_prefix + "-" + str(proc_number),
target=target_function,
args=(proc_number, conf, _queue, _finish_notification, file,fasttext_vocab_cached_mapping,fasttext_vocab_cached_data))
process.start()
_processes.append(process)
return _queue, _processes, _finish_notification
#
# training instance generator
# - filling the _queue with ready to run training batches
# - everything is thread local
#
示例4: multiprocess_training_loader
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import Event [as 别名]
def multiprocess_training_loader(process_number: int, _config, _queue: mp.Queue, _wait_for_exit: mp.Event, _local_file,_fasttext_vocab_cached_mapping,_fasttext_vocab_cached_data):
# workflow: we tokenize the data files with the costly spacy before training in a preprocessing step
# (and concat the tokens with single whitespaces), so here we only split on the whitepsaces
_tokenizer = None
if _config["preprocessed_tokenized"] == True:
_tokenizer = WordTokenizer(word_splitter=JustSpacesWordSplitter())
if _config["token_embedder_type"] == "embedding":
_token_indexers = {"tokens": SingleIdTokenIndexer(lowercase_tokens=True)}
_vocab = Vocabulary.from_files(_config["vocab_directory"])
elif _config["token_embedder_type"] == "fasttext":
_token_indexers = {"tokens": FastTextNGramIndexer(_config["fasttext_max_subwords"])}
_vocab = FastTextVocab(_fasttext_vocab_cached_mapping,_fasttext_vocab_cached_data,_config["fasttext_max_subwords"])
elif _config["token_embedder_type"] == "elmo":
_token_indexers = {"tokens": ELMoTokenCharactersIndexer()}
_vocab = None
_triple_loader = IrTripleDatasetReader(lazy=True, tokenizer=_tokenizer,token_indexers=_token_indexers, max_doc_length=_config["max_doc_length"],max_query_length=_config["max_query_length"])
_iterator = BucketIterator(batch_size=int(_config["batch_size_train"]),
sorting_keys=[("doc_pos_tokens", "num_tokens"), ("doc_neg_tokens", "num_tokens")])
_iterator.index_with(_vocab)
for training_batch in _iterator(_triple_loader.read(_local_file), num_epochs=1):
_queue.put(training_batch) # this moves the tensors in to shared memory
_queue.close() # indicate this local thread is done
_wait_for_exit.wait() # keep this process alive until all the shared memory is used and not needed anymore
#
# validation instance generator
# - filling the _queue with ready to run validation batches
# - everything is defined thread local
#
示例5: multiprocess_validation_loader
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import Event [as 别名]
def multiprocess_validation_loader(process_number: int, _config, _queue: mp.Queue, _wait_for_exit: mp.Event, _local_file,_fasttext_vocab_cached_mapping,_fasttext_vocab_cached_data):
# workflow: we tokenize the data files with the costly spacy before training in a preprocessing step
# (and concat the tokens with single whitespaces), so here we only split on the whitepsaces
_tokenizer = None
if _config and _config["preprocessed_tokenized"] == True:
_tokenizer = WordTokenizer(word_splitter=JustSpacesWordSplitter())
if _config["token_embedder_type"] == "embedding":
_token_indexers = {"tokens": SingleIdTokenIndexer(lowercase_tokens=True)}
_vocab = Vocabulary.from_files(_config["vocab_directory"])
elif _config["token_embedder_type"] == "fasttext":
_token_indexers = {"tokens": FastTextNGramIndexer(_config["fasttext_max_subwords"])}
_vocab = FastTextVocab(_fasttext_vocab_cached_mapping,_fasttext_vocab_cached_data,_config["fasttext_max_subwords"])
elif _config["token_embedder_type"] == "elmo":
_token_indexers = {"tokens": ELMoTokenCharactersIndexer()}
_vocab = None
_tuple_loader = IrLabeledTupleDatasetReader(lazy=True, tokenizer=_tokenizer,token_indexers=_token_indexers, max_doc_length=_config["max_doc_length"],max_query_length=_config["max_query_length"])
_iterator = BucketIterator(batch_size=int(_config["batch_size_eval"]),
sorting_keys=[("doc_tokens", "num_tokens"), ("query_tokens", "num_tokens")])
_iterator.index_with(_vocab)
for training_batch in _iterator(_tuple_loader.read(_local_file), num_epochs=1):
_queue.put(training_batch) # this moves the tensors in to shared memory
_queue.close() # indicate this local thread is done
_wait_for_exit.wait() # keep this process alive until all the shared memory is used and not needed anymore
示例6: get_multiprocess_batch_queue
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import Event [as 别名]
def get_multiprocess_batch_queue(name_prefix: str, target_function, files, conf, _logger, queue_size=100) -> Tuple[mp.Queue, List[mp.Process], mp.Event]:
ctx = mp.get_context('spawn') # also set so that windows & linux behave the same
_processes = []
_finish_notification = ctx.Event()
if len(files) == 0:
_logger.error("No files for multiprocess loading specified, for: " + name_prefix)
exit(1)
else:
_logger.info("Starting "+str(len(files))+" data loader processes, for:" + name_prefix)
if conf["token_embedder_type"] == "fasttext":
global fasttext_vocab_cached_mapping
global fasttext_vocab_cached_data
if fasttext_vocab_cached_data is None:
fasttext_vocab_cached_mapping, fasttext_vocab_cached_data = FastTextVocab.load_ids(conf["fasttext_vocab_mapping"],conf["fasttext_max_subwords"])
fasttext_vocab_cached_data.share_memory_()
_queue_list = []
#_queue = ctx.Queue(queue_size)
for proc_number, file in enumerate(files):
_queue = ctx.Queue(queue_size)
process = ctx.Process(name=name_prefix + "-" + str(proc_number),
target=target_function,
args=(proc_number, conf, _queue, _finish_notification, file,fasttext_vocab_cached_mapping,fasttext_vocab_cached_data))
process.start()
_processes.append(process)
_queue_list.append(_queue)
return DeterministicQueue(_queue_list), _processes, _finish_notification
#return _queue, _processes, _finish_notification
#
# training instance generator
# - filling the _queue with ready to run training batches
# - everything is thread local
#
开发者ID:sebastian-hofstaetter,项目名称:transformer-kernel-ranking,代码行数:39,代码来源:multiprocess_input_pipeline.py
示例7: sample
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import Event [as 别名]
def sample(env, policy, batchsz, process_num):
"""
Given batchsz number of task, the batchsz will be splited equally to each processes
and when processes return, it merge all data and return
:param env:
:param policy:
:param batchsz:
:param process_num:
:return: batch
"""
# batchsz will be splitted into each process,
# final batchsz maybe larger than batchsz parameters
process_batchsz = np.ceil(batchsz / process_num).astype(np.int32)
# buffer to save all data
queue = mp.Queue()
# start processes for pid in range(1, processnum)
# if processnum = 1, this part will be ignored.
# when save tensor in Queue, the process should keep alive till Queue.get(),
# please refer to : https://discuss.pytorch.org/t/using-torch-tensor-over-multiprocessing-queue-process-fails/2847
# however still some problem on CUDA tensors on multiprocessing queue,
# please refer to : https://discuss.pytorch.org/t/cuda-tensors-on-multiprocessing-queue/28626
# so just transform tensors into numpy, then put them into queue.
evt = mp.Event()
processes = []
for i in range(process_num):
process_args = (i, queue, evt, env, policy, process_batchsz)
processes.append(mp.Process(target=sampler, args=process_args))
for p in processes:
# set the process as daemon, and it will be killed once the main process is stoped.
p.daemon = True
p.start()
# we need to get the first Memory object and then merge others Memory use its append function.
pid0, buff0 = queue.get()
for _ in range(1, process_num):
pid, buff_ = queue.get()
buff0.append(buff_) # merge current Memory into buff0
evt.set()
# now buff saves all the sampled data
buff = buff0
return buff.get_batch()
示例8: multiprocess_training_loader
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import Event [as 别名]
def multiprocess_training_loader(process_number: int, _config, _queue: mp.Queue, _wait_for_exit: mp.Event, _local_file,_fasttext_vocab_cached_mapping,_fasttext_vocab_cached_data):
torch.manual_seed(_config["random_seed"])
numpy.random.seed(_config["random_seed"])
random.seed(_config["random_seed"])
if _config["token_embedder_type"] == "bert_cls":
_tokenizer = BlingFireTokenizer()
_ind = PretrainedBertIndexer(pretrained_model=_config["bert_pretrained_model"], do_lowercase=True)
_token_indexers = {"tokens": _ind}
_triple_loader = BertTripleDatasetReader(lazy=True, tokenizer=_tokenizer,token_indexers=_token_indexers,
max_doc_length=_config["max_doc_length"],max_query_length=_config["max_query_length"],
min_doc_length=_config["min_doc_length"],min_query_length=_config["min_query_length"])
_iterator = BucketIterator(batch_size=int(_config["batch_size_train"]),
sorting_keys=[("doc_pos_tokens", "num_tokens"), ("doc_neg_tokens", "num_tokens")])
_iterator.index_with(Vocabulary())#.from_files(_config["vocab_directory"]))
else:
_tokenizer = BlingFireTokenizer()
if _config["token_embedder_type"] == "embedding":
_token_indexers = {"tokens": SingleIdTokenIndexer(lowercase_tokens=True)}
_vocab = Vocabulary.from_files(_config["vocab_directory"])
elif _config["token_embedder_type"] == "fasttext":
_token_indexers = {"tokens": FastTextNGramIndexer(_config["fasttext_max_subwords"])}
_vocab = FastTextVocab(_fasttext_vocab_cached_mapping,_fasttext_vocab_cached_data,_config["fasttext_max_subwords"])
elif _config["token_embedder_type"] == "elmo":
_token_indexers = {"tokens": ELMoTokenCharactersIndexer()}
_vocab = None
_triple_loader = IrTripleDatasetReader(lazy=True, tokenizer=_tokenizer,token_indexers=_token_indexers,
max_doc_length=_config["max_doc_length"],max_query_length=_config["max_query_length"],
min_doc_length=_config["min_doc_length"],min_query_length=_config["min_query_length"])
_iterator = BucketIterator(batch_size=int(_config["batch_size_train"]),
sorting_keys=[("doc_pos_tokens", "num_tokens"), ("doc_neg_tokens", "num_tokens")])
_iterator.index_with(_vocab)
for training_batch in _iterator(_triple_loader.read(_local_file), num_epochs=1):
_queue.put(training_batch) # this moves the tensors in to shared memory
_queue.put(None) # signal end of queue
_queue.close() # indicate this local thread is done
_wait_for_exit.wait() # keep this process alive until all the shared memory is used and not needed anymore
#
# validation instance generator
# - filling the _queue with ready to run validation batches
# - everything is defined thread local
#
开发者ID:sebastian-hofstaetter,项目名称:transformer-kernel-ranking,代码行数:60,代码来源:multiprocess_input_pipeline.py
示例9: multiprocess_validation_loader
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import Event [as 别名]
def multiprocess_validation_loader(process_number: int, _config, _queue: mp.Queue, _wait_for_exit: mp.Event, _local_file,_fasttext_vocab_cached_mapping,_fasttext_vocab_cached_data):
torch.manual_seed(_config["random_seed"])
numpy.random.seed(_config["random_seed"])
random.seed(_config["random_seed"])
if _config["token_embedder_type"] == "bert_cls":
_tokenizer = BlingFireTokenizer()
_ind = PretrainedBertIndexer(pretrained_model=_config["bert_pretrained_model"], do_lowercase=True)
_token_indexers = {"tokens": _ind}
_tuple_loader = BertLabeledTupleDatasetReader(lazy=True, tokenizer=_tokenizer,token_indexers=_token_indexers,
max_doc_length=_config["max_doc_length"],max_query_length=_config["max_query_length"],
min_doc_length=_config["min_doc_length"],min_query_length=_config["min_query_length"])
_iterator = BucketIterator(batch_size=int(_config["batch_size_eval"]),
sorting_keys=[("doc_tokens", "num_tokens")])
_iterator.index_with(Vocabulary.from_files(_config["vocab_directory"]))
else:
_tokenizer = BlingFireTokenizer()
if _config["token_embedder_type"] == "embedding":
_token_indexers = {"tokens": SingleIdTokenIndexer(lowercase_tokens=True)}
_vocab = Vocabulary.from_files(_config["vocab_directory"])
elif _config["token_embedder_type"] == "fasttext":
_token_indexers = {"tokens": FastTextNGramIndexer(_config["fasttext_max_subwords"])}
_vocab = FastTextVocab(_fasttext_vocab_cached_mapping,_fasttext_vocab_cached_data,_config["fasttext_max_subwords"])
elif _config["token_embedder_type"] == "elmo":
_token_indexers = {"tokens": ELMoTokenCharactersIndexer()}
_vocab = None
_tuple_loader = IrLabeledTupleDatasetReader(lazy=True, tokenizer=_tokenizer,token_indexers=_token_indexers,
max_doc_length=_config["max_doc_length"],max_query_length=_config["max_query_length"],
min_doc_length=_config["min_doc_length"],min_query_length=_config["min_query_length"])
_iterator = BucketIterator(batch_size=int(_config["batch_size_eval"]),
sorting_keys=[("doc_tokens", "num_tokens"), ("query_tokens", "num_tokens")])
_iterator.index_with(_vocab)
for training_batch in _iterator(_tuple_loader.read(_local_file), num_epochs=1):
_queue.put(training_batch) # this moves the tensors in to shared memory
_queue.put(None) # signal end of queue
_queue.close() # indicate this local thread is done
_wait_for_exit.wait() # keep this process alive until all the shared memory is used and not needed anymore
#
# single sequence loader from multiple files
#
开发者ID:sebastian-hofstaetter,项目名称:transformer-kernel-ranking,代码行数:58,代码来源:multiprocess_input_pipeline.py
示例10: multiprocess_single_sequence_loader
# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import Event [as 别名]
def multiprocess_single_sequence_loader(process_number: int, _config, _queue: mp.Queue, _wait_for_exit: mp.Event, _local_file,_fasttext_vocab_cached_mapping,_fasttext_vocab_cached_data):
torch.manual_seed(_config["random_seed"])
numpy.random.seed(_config["random_seed"])
random.seed(_config["random_seed"])
if _config["token_embedder_type"] == "bert_cls":
_tokenizer = BlingFireTokenizer()
_ind = PretrainedBertIndexer(pretrained_model=_config["bert_pretrained_model"], do_lowercase=True)
_token_indexers = {"tokens": _ind}
_tuple_loader = IrSingleSequenceDatasetReader(lazy=True, tokenizer=_tokenizer,token_indexers=_token_indexers,
max_seq_length= _config["max_doc_length"], min_seq_length=_config["min_doc_length"],)
_iterator = BucketIterator(batch_size=int(_config["batch_size_eval"]),
sorting_keys=[("seq_tokens", "num_tokens")])
_iterator.index_with(Vocabulary.from_files(_config["vocab_directory"]))
else:
_tokenizer = BlingFireTokenizer()
if _config["token_embedder_type"] == "embedding":
_token_indexers = {"tokens": SingleIdTokenIndexer(lowercase_tokens=True)}
_vocab = Vocabulary.from_files(_config["vocab_directory"])
elif _config["token_embedder_type"] == "fasttext":
_token_indexers = {"tokens": FastTextNGramIndexer(_config["fasttext_max_subwords"])}
_vocab = FastTextVocab(_fasttext_vocab_cached_mapping,_fasttext_vocab_cached_data,_config["fasttext_max_subwords"])
elif _config["token_embedder_type"] == "elmo":
_token_indexers = {"tokens": ELMoTokenCharactersIndexer()}
_vocab = None
_tuple_loader = IrSingleSequenceDatasetReader(lazy=True, tokenizer=_tokenizer,token_indexers=_token_indexers,
max_seq_length= _config["max_doc_length"], min_seq_length=_config["min_doc_length"],)
_iterator = BucketIterator(batch_size=int(_config["batch_size_eval"]),
sorting_keys=[("seq_tokens", "num_tokens")])
_iterator.index_with(_vocab)
for training_batch in _iterator(_tuple_loader.read(_local_file), num_epochs=1):
_queue.put(training_batch) # this moves the tensors in to shared memory
_queue.put(None) # signal end of queue
_queue.close() # indicate this local thread is done
_wait_for_exit.wait() # keep this process alive until all the shared memory is used and not needed anymore
开发者ID:sebastian-hofstaetter,项目名称:transformer-kernel-ranking,代码行数:52,代码来源:multiprocess_input_pipeline.py