当前位置: 首页>>代码示例>>Python>>正文


Python multiprocessing.Event方法代码示例

本文整理汇总了Python中torch.multiprocessing.Event方法的典型用法代码示例。如果您正苦于以下问题:Python multiprocessing.Event方法的具体用法?Python multiprocessing.Event怎么用?Python multiprocessing.Event使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在torch.multiprocessing的用法示例。


在下文中一共展示了multiprocessing.Event方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import Event [as 别名]
def __init__(self, fp16=False, mean=(0., 0., 0.), std=(1., 1., 1.), pin_memory=True, **kwargs):
        super().__init__(**kwargs)
        print('Using DALI CPU iterator')
        self.stream = torch.cuda.Stream()

        self.fp16 = fp16
        self.mean = torch.tensor(mean).cuda().view(1, 3, 1, 1)
        self.std = torch.tensor(std).cuda().view(1, 3, 1, 1)
        self.pin_memory = pin_memory

        if self.fp16:
            self.mean = self.mean.half()
            self.std = self.std.half()

        self.proc_next_input = Event()
        self.done_event = Event()
        self.output_queue = queue.Queue(maxsize=5)
        self.preproc_thread = threading.Thread(
            target=_preproc_worker,
            kwargs={'dali_iterator': self._dali_iterator, 'cuda_stream': self.stream, 'fp16': self.fp16, 'mean': self.mean, 'std': self.std, 'proc_next_input': self.proc_next_input, 'done_event': self.done_event, 'output_queue': self.output_queue, 'pin_memory': self.pin_memory})
        self.preproc_thread.daemon = True
        self.preproc_thread.start()

        self.proc_next_input.set() 
开发者ID:yaysummeriscoming,项目名称:DALI_pytorch_demo,代码行数:26,代码来源:dali.py

示例2: run_in_process_group

# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import Event [as 别名]
def run_in_process_group(world_size, filename, fn, inputs):
    if torch.distributed.is_initialized():
        torch.distributed.destroy_process_group()
    processes = []
    q = Queue()
    wait_event = Event()

    # run the remaining processes
    # for rank in range(world_size - 1):
    for rank in range(world_size):
        p = Process(
            target=init_and_run_process,
            args=(rank, world_size, filename, fn, inputs[rank], q, wait_event),
        )
        p.start()
        processes.append(p)

    # fetch the results from the queue before joining, the background processes
    # need to be alive if the queue contains tensors. See
    # https://discuss.pytorch.org/t/using-torch-tensor-over-multiprocessing-queue-process-fails/2847/3  # noqa: B950
    results = []
    for _ in range(len(processes)):
        results.append(q.get())

    wait_event.set()

    for p in processes:
        p.join()
    return results 
开发者ID:facebookresearch,项目名称:ClassyVision,代码行数:31,代码来源:generic_distributed_util_test.py

示例3: get_multiprocess_batch_queue

# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import Event [as 别名]
def get_multiprocess_batch_queue(name_prefix: str, target_function, files, conf, _logger, queue_size=100) -> Tuple[mp.Queue, List[mp.Process], mp.Event]:
    ctx = mp.get_context('spawn') # also set so that windows & linux behave the same 
    _queue = ctx.Queue(queue_size)
    _processes = []
    _finish_notification = ctx.Event()

    if len(files) == 0:
        _logger.error("No files for multiprocess loading specified, for: " + name_prefix)
        exit(1)
    else:
        _logger.info("Starting "+str(len(files))+" data loader processes, for:" + name_prefix)

    if conf["token_embedder_type"] == "fasttext":
        global fasttext_vocab_cached_mapping
        global fasttext_vocab_cached_data
        if fasttext_vocab_cached_data is None:
            fasttext_vocab_cached_mapping, fasttext_vocab_cached_data = FastTextVocab.load_ids(conf["fasttext_vocab_mapping"],conf["fasttext_max_subwords"])
            fasttext_vocab_cached_data.share_memory_()

    for proc_number, file in enumerate(files):
        process = ctx.Process(name=name_prefix + "-" + str(proc_number),
                             target=target_function,
                             args=(proc_number, conf, _queue, _finish_notification, file,fasttext_vocab_cached_mapping,fasttext_vocab_cached_data))
        process.start()
        _processes.append(process)
    return _queue, _processes, _finish_notification


#
# training instance generator
#   - filling the _queue with ready to run training batches
#   - everything is thread local
# 
开发者ID:sebastian-hofstaetter,项目名称:sigir19-neural-ir,代码行数:35,代码来源:multiprocess_input_pipeline.py

示例4: multiprocess_training_loader

# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import Event [as 别名]
def multiprocess_training_loader(process_number: int, _config, _queue: mp.Queue, _wait_for_exit: mp.Event, _local_file,_fasttext_vocab_cached_mapping,_fasttext_vocab_cached_data):

    # workflow: we tokenize the data files with the costly spacy before training in a preprocessing step 
    # (and concat the tokens with single whitespaces), so here we only split on the whitepsaces
    _tokenizer = None
    if _config["preprocessed_tokenized"] == True:
        _tokenizer = WordTokenizer(word_splitter=JustSpacesWordSplitter())

    if _config["token_embedder_type"] == "embedding":
        _token_indexers = {"tokens": SingleIdTokenIndexer(lowercase_tokens=True)}
        _vocab = Vocabulary.from_files(_config["vocab_directory"])

    elif _config["token_embedder_type"] == "fasttext":
        _token_indexers = {"tokens": FastTextNGramIndexer(_config["fasttext_max_subwords"])}
        _vocab = FastTextVocab(_fasttext_vocab_cached_mapping,_fasttext_vocab_cached_data,_config["fasttext_max_subwords"])

    elif _config["token_embedder_type"] == "elmo":
        _token_indexers = {"tokens": ELMoTokenCharactersIndexer()}
        _vocab = None

    _triple_loader = IrTripleDatasetReader(lazy=True, tokenizer=_tokenizer,token_indexers=_token_indexers, max_doc_length=_config["max_doc_length"],max_query_length=_config["max_query_length"])

    _iterator = BucketIterator(batch_size=int(_config["batch_size_train"]),
                               sorting_keys=[("doc_pos_tokens", "num_tokens"), ("doc_neg_tokens", "num_tokens")])

    _iterator.index_with(_vocab)

    for training_batch in _iterator(_triple_loader.read(_local_file), num_epochs=1):

        _queue.put(training_batch)  # this moves the tensors in to shared memory

    _queue.close()  # indicate this local thread is done
    _wait_for_exit.wait()  # keep this process alive until all the shared memory is used and not needed anymore

#
# validation instance generator
#   - filling the _queue with ready to run validation batches
#   - everything is defined thread local
# 
开发者ID:sebastian-hofstaetter,项目名称:sigir19-neural-ir,代码行数:41,代码来源:multiprocess_input_pipeline.py

示例5: multiprocess_validation_loader

# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import Event [as 别名]
def multiprocess_validation_loader(process_number: int, _config, _queue: mp.Queue, _wait_for_exit: mp.Event, _local_file,_fasttext_vocab_cached_mapping,_fasttext_vocab_cached_data):

    # workflow: we tokenize the data files with the costly spacy before training in a preprocessing step 
    # (and concat the tokens with single whitespaces), so here we only split on the whitepsaces
    _tokenizer = None
    if _config and _config["preprocessed_tokenized"] == True:
        _tokenizer = WordTokenizer(word_splitter=JustSpacesWordSplitter())

    if _config["token_embedder_type"] == "embedding":
        _token_indexers = {"tokens": SingleIdTokenIndexer(lowercase_tokens=True)}
        _vocab = Vocabulary.from_files(_config["vocab_directory"])

    elif _config["token_embedder_type"] == "fasttext":
        _token_indexers = {"tokens": FastTextNGramIndexer(_config["fasttext_max_subwords"])}
        _vocab = FastTextVocab(_fasttext_vocab_cached_mapping,_fasttext_vocab_cached_data,_config["fasttext_max_subwords"])

    elif _config["token_embedder_type"] == "elmo":
        _token_indexers = {"tokens": ELMoTokenCharactersIndexer()}
        _vocab = None

    _tuple_loader = IrLabeledTupleDatasetReader(lazy=True, tokenizer=_tokenizer,token_indexers=_token_indexers, max_doc_length=_config["max_doc_length"],max_query_length=_config["max_query_length"])

    _iterator = BucketIterator(batch_size=int(_config["batch_size_eval"]),
                               sorting_keys=[("doc_tokens", "num_tokens"), ("query_tokens", "num_tokens")])

    _iterator.index_with(_vocab)

    for training_batch in _iterator(_tuple_loader.read(_local_file), num_epochs=1):

        _queue.put(training_batch)  # this moves the tensors in to shared memory

    _queue.close()  # indicate this local thread is done
    _wait_for_exit.wait()  # keep this process alive until all the shared memory is used and not needed anymore 
开发者ID:sebastian-hofstaetter,项目名称:sigir19-neural-ir,代码行数:35,代码来源:multiprocess_input_pipeline.py

示例6: get_multiprocess_batch_queue

# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import Event [as 别名]
def get_multiprocess_batch_queue(name_prefix: str, target_function, files, conf, _logger, queue_size=100) -> Tuple[mp.Queue, List[mp.Process], mp.Event]:
    ctx = mp.get_context('spawn') # also set so that windows & linux behave the same 
    _processes = []
    _finish_notification = ctx.Event()

    if len(files) == 0:
        _logger.error("No files for multiprocess loading specified, for: " + name_prefix)
        exit(1)
    else:
        _logger.info("Starting "+str(len(files))+" data loader processes, for:" + name_prefix)

    if conf["token_embedder_type"] == "fasttext":
        global fasttext_vocab_cached_mapping
        global fasttext_vocab_cached_data
        if fasttext_vocab_cached_data is None:
            fasttext_vocab_cached_mapping, fasttext_vocab_cached_data = FastTextVocab.load_ids(conf["fasttext_vocab_mapping"],conf["fasttext_max_subwords"])
            fasttext_vocab_cached_data.share_memory_()

    _queue_list = []
    #_queue = ctx.Queue(queue_size)
    for proc_number, file in enumerate(files):
        _queue = ctx.Queue(queue_size)
        process = ctx.Process(name=name_prefix + "-" + str(proc_number),
                             target=target_function,
                             args=(proc_number, conf, _queue, _finish_notification, file,fasttext_vocab_cached_mapping,fasttext_vocab_cached_data))
        process.start()
        _processes.append(process)
        _queue_list.append(_queue)
    return DeterministicQueue(_queue_list), _processes, _finish_notification
    #return _queue, _processes, _finish_notification


#
# training instance generator
#   - filling the _queue with ready to run training batches
#   - everything is thread local
# 
开发者ID:sebastian-hofstaetter,项目名称:transformer-kernel-ranking,代码行数:39,代码来源:multiprocess_input_pipeline.py

示例7: sample

# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import Event [as 别名]
def sample(env, policy, batchsz, process_num):
    """
    Given batchsz number of task, the batchsz will be splited equally to each processes
    and when processes return, it merge all data and return
	:param env:
	:param policy:
    :param batchsz:
	:param process_num:
    :return: batch
    """

    # batchsz will be splitted into each process,
    # final batchsz maybe larger than batchsz parameters
    process_batchsz = np.ceil(batchsz / process_num).astype(np.int32)
    # buffer to save all data
    queue = mp.Queue()

    # start processes for pid in range(1, processnum)
    # if processnum = 1, this part will be ignored.
    # when save tensor in Queue, the process should keep alive till Queue.get(),
    # please refer to : https://discuss.pytorch.org/t/using-torch-tensor-over-multiprocessing-queue-process-fails/2847
    # however still some problem on CUDA tensors on multiprocessing queue,
    # please refer to : https://discuss.pytorch.org/t/cuda-tensors-on-multiprocessing-queue/28626
    # so just transform tensors into numpy, then put them into queue.
    evt = mp.Event()
    processes = []
    for i in range(process_num):
        process_args = (i, queue, evt, env, policy, process_batchsz)
        processes.append(mp.Process(target=sampler, args=process_args))
    for p in processes:
        # set the process as daemon, and it will be killed once the main process is stoped.
        p.daemon = True
        p.start()

    # we need to get the first Memory object and then merge others Memory use its append function.
    pid0, buff0 = queue.get()
    for _ in range(1, process_num):
        pid, buff_ = queue.get()
        buff0.append(buff_)  # merge current Memory into buff0
    evt.set()

    # now buff saves all the sampled data
    buff = buff0

    return buff.get_batch() 
开发者ID:thu-coai,项目名称:tatk,代码行数:47,代码来源:example_train.py

示例8: multiprocess_training_loader

# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import Event [as 别名]
def multiprocess_training_loader(process_number: int, _config, _queue: mp.Queue, _wait_for_exit: mp.Event, _local_file,_fasttext_vocab_cached_mapping,_fasttext_vocab_cached_data):

    torch.manual_seed(_config["random_seed"])
    numpy.random.seed(_config["random_seed"])
    random.seed(_config["random_seed"])

    if _config["token_embedder_type"] == "bert_cls":
        _tokenizer = BlingFireTokenizer()
        _ind = PretrainedBertIndexer(pretrained_model=_config["bert_pretrained_model"], do_lowercase=True)
        _token_indexers = {"tokens": _ind}

        _triple_loader = BertTripleDatasetReader(lazy=True, tokenizer=_tokenizer,token_indexers=_token_indexers, 
                                               max_doc_length=_config["max_doc_length"],max_query_length=_config["max_query_length"],
                                               min_doc_length=_config["min_doc_length"],min_query_length=_config["min_query_length"])
    
        _iterator = BucketIterator(batch_size=int(_config["batch_size_train"]),
                                   sorting_keys=[("doc_pos_tokens", "num_tokens"), ("doc_neg_tokens", "num_tokens")])
    
        _iterator.index_with(Vocabulary())#.from_files(_config["vocab_directory"]))

    else:
        _tokenizer = BlingFireTokenizer()

        if _config["token_embedder_type"] == "embedding":
            _token_indexers = {"tokens": SingleIdTokenIndexer(lowercase_tokens=True)}
            _vocab = Vocabulary.from_files(_config["vocab_directory"])

        elif _config["token_embedder_type"] == "fasttext":
            _token_indexers = {"tokens": FastTextNGramIndexer(_config["fasttext_max_subwords"])}
            _vocab = FastTextVocab(_fasttext_vocab_cached_mapping,_fasttext_vocab_cached_data,_config["fasttext_max_subwords"])

        elif _config["token_embedder_type"] == "elmo":
            _token_indexers = {"tokens": ELMoTokenCharactersIndexer()}
            _vocab = None

        _triple_loader = IrTripleDatasetReader(lazy=True, tokenizer=_tokenizer,token_indexers=_token_indexers, 
                                               max_doc_length=_config["max_doc_length"],max_query_length=_config["max_query_length"],
                                               min_doc_length=_config["min_doc_length"],min_query_length=_config["min_query_length"])

        _iterator = BucketIterator(batch_size=int(_config["batch_size_train"]),
                                   sorting_keys=[("doc_pos_tokens", "num_tokens"), ("doc_neg_tokens", "num_tokens")])

        _iterator.index_with(_vocab)

    for training_batch in _iterator(_triple_loader.read(_local_file), num_epochs=1):

        _queue.put(training_batch)  # this moves the tensors in to shared memory

    _queue.put(None) # signal end of queue

    _queue.close()  # indicate this local thread is done
    _wait_for_exit.wait()  # keep this process alive until all the shared memory is used and not needed anymore

#
# validation instance generator
#   - filling the _queue with ready to run validation batches
#   - everything is defined thread local
# 
开发者ID:sebastian-hofstaetter,项目名称:transformer-kernel-ranking,代码行数:60,代码来源:multiprocess_input_pipeline.py

示例9: multiprocess_validation_loader

# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import Event [as 别名]
def multiprocess_validation_loader(process_number: int, _config, _queue: mp.Queue, _wait_for_exit: mp.Event, _local_file,_fasttext_vocab_cached_mapping,_fasttext_vocab_cached_data):

    torch.manual_seed(_config["random_seed"])
    numpy.random.seed(_config["random_seed"])
    random.seed(_config["random_seed"])

    if _config["token_embedder_type"] == "bert_cls":
        _tokenizer = BlingFireTokenizer()
        _ind = PretrainedBertIndexer(pretrained_model=_config["bert_pretrained_model"], do_lowercase=True)
        _token_indexers = {"tokens": _ind}

        _tuple_loader = BertLabeledTupleDatasetReader(lazy=True, tokenizer=_tokenizer,token_indexers=_token_indexers, 
                                                      max_doc_length=_config["max_doc_length"],max_query_length=_config["max_query_length"],
                                                      min_doc_length=_config["min_doc_length"],min_query_length=_config["min_query_length"])
    
        _iterator = BucketIterator(batch_size=int(_config["batch_size_eval"]),
                                   sorting_keys=[("doc_tokens", "num_tokens")])
    
        _iterator.index_with(Vocabulary.from_files(_config["vocab_directory"]))

    else:
        _tokenizer = BlingFireTokenizer()

        if _config["token_embedder_type"] == "embedding":
            _token_indexers = {"tokens": SingleIdTokenIndexer(lowercase_tokens=True)}
            _vocab = Vocabulary.from_files(_config["vocab_directory"])

        elif _config["token_embedder_type"] == "fasttext":
            _token_indexers = {"tokens": FastTextNGramIndexer(_config["fasttext_max_subwords"])}
            _vocab = FastTextVocab(_fasttext_vocab_cached_mapping,_fasttext_vocab_cached_data,_config["fasttext_max_subwords"])

        elif _config["token_embedder_type"] == "elmo":
            _token_indexers = {"tokens": ELMoTokenCharactersIndexer()}
            _vocab = None

        _tuple_loader = IrLabeledTupleDatasetReader(lazy=True, tokenizer=_tokenizer,token_indexers=_token_indexers, 
                                                    max_doc_length=_config["max_doc_length"],max_query_length=_config["max_query_length"],
                                                    min_doc_length=_config["min_doc_length"],min_query_length=_config["min_query_length"])

        _iterator = BucketIterator(batch_size=int(_config["batch_size_eval"]),
                                   sorting_keys=[("doc_tokens", "num_tokens"), ("query_tokens", "num_tokens")])

        _iterator.index_with(_vocab)

    for training_batch in _iterator(_tuple_loader.read(_local_file), num_epochs=1):

        _queue.put(training_batch)  # this moves the tensors in to shared memory

    _queue.put(None) # signal end of queue

    _queue.close()  # indicate this local thread is done
    _wait_for_exit.wait()  # keep this process alive until all the shared memory is used and not needed anymore

#
# single sequence loader from multiple files 
# 
开发者ID:sebastian-hofstaetter,项目名称:transformer-kernel-ranking,代码行数:58,代码来源:multiprocess_input_pipeline.py

示例10: multiprocess_single_sequence_loader

# 需要导入模块: from torch import multiprocessing [as 别名]
# 或者: from torch.multiprocessing import Event [as 别名]
def multiprocess_single_sequence_loader(process_number: int, _config, _queue: mp.Queue, _wait_for_exit: mp.Event, _local_file,_fasttext_vocab_cached_mapping,_fasttext_vocab_cached_data):

    torch.manual_seed(_config["random_seed"])
    numpy.random.seed(_config["random_seed"])
    random.seed(_config["random_seed"])

    if _config["token_embedder_type"] == "bert_cls":
        _tokenizer = BlingFireTokenizer()
        _ind = PretrainedBertIndexer(pretrained_model=_config["bert_pretrained_model"], do_lowercase=True)
        _token_indexers = {"tokens": _ind}

        _tuple_loader = IrSingleSequenceDatasetReader(lazy=True, tokenizer=_tokenizer,token_indexers=_token_indexers, 
                                                max_seq_length= _config["max_doc_length"], min_seq_length=_config["min_doc_length"],)

        _iterator = BucketIterator(batch_size=int(_config["batch_size_eval"]),
                                   sorting_keys=[("seq_tokens", "num_tokens")])

        _iterator.index_with(Vocabulary.from_files(_config["vocab_directory"]))

    else:
        _tokenizer = BlingFireTokenizer()

        if _config["token_embedder_type"] == "embedding":
            _token_indexers = {"tokens": SingleIdTokenIndexer(lowercase_tokens=True)}
            _vocab = Vocabulary.from_files(_config["vocab_directory"])

        elif _config["token_embedder_type"] == "fasttext":
            _token_indexers = {"tokens": FastTextNGramIndexer(_config["fasttext_max_subwords"])}
            _vocab = FastTextVocab(_fasttext_vocab_cached_mapping,_fasttext_vocab_cached_data,_config["fasttext_max_subwords"])

        elif _config["token_embedder_type"] == "elmo":
            _token_indexers = {"tokens": ELMoTokenCharactersIndexer()}
            _vocab = None

        _tuple_loader = IrSingleSequenceDatasetReader(lazy=True, tokenizer=_tokenizer,token_indexers=_token_indexers, 
                                                    max_seq_length= _config["max_doc_length"], min_seq_length=_config["min_doc_length"],)

        _iterator = BucketIterator(batch_size=int(_config["batch_size_eval"]),
                                   sorting_keys=[("seq_tokens", "num_tokens")])

        _iterator.index_with(_vocab)

    for training_batch in _iterator(_tuple_loader.read(_local_file), num_epochs=1):

        _queue.put(training_batch)  # this moves the tensors in to shared memory

    _queue.put(None) # signal end of queue

    _queue.close()  # indicate this local thread is done
    _wait_for_exit.wait()  # keep this process alive until all the shared memory is used and not needed anymore 
开发者ID:sebastian-hofstaetter,项目名称:transformer-kernel-ranking,代码行数:52,代码来源:multiprocess_input_pipeline.py


注:本文中的torch.multiprocessing.Event方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。