本文整理汇总了Python中multiprocessing.Manager.put方法的典型用法代码示例。如果您正苦于以下问题:Python Manager.put方法的具体用法?Python Manager.put怎么用?Python Manager.put使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类multiprocessing.Manager
的用法示例。
在下文中一共展示了Manager.put方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: timeout_iterator
# 需要导入模块: from multiprocessing import Manager [as 别名]
# 或者: from multiprocessing.Manager import put [as 别名]
def timeout_iterator(iterator):
"""Wraps an iterator and makes it timeout after time ``timeout``.
Parameters
----------
iterator : iterator
Returns
-------
timeout_iterator : iterator
"""
buffer_ = Manager().Queue()
process = Process(
target=partial(map),
args=(buffer_.put, iterator)
)
process.start()
process.join(timeout)
process.terminate()
buffer_.put(QueueStop())
timeout_iterator = iter(buffer_.get, QueueStop())
return timeout_iterator
示例2: predict
# 需要导入模块: from multiprocessing import Manager [as 别名]
# 或者: from multiprocessing.Manager import put [as 别名]
def predict(self, x):
""" Predict class for test set x.
The predicted class of an input sample is computed as the majority
prediction of the trees in the forest.
Parameters
----------
x : array-like of shape = [n_samples, n_features]
The test input samples.
Returns
-------
y_pred : array of shape = [n_samples]
The predicted classes.
probs : array of shape = [n_samples]
Probabilities of each sample to belong to the predicted class.
"""
if self.n_jobs == -1:
n_workers = min(cpu_count(), self.n_trees)
else:
n_workers = min(self.n_jobs, self.n_trees)
# Establish communication queues.
tasks = Manager().JoinableQueue()
results = Manager().Queue()
# Start workers.
workers = [Worker(tasks, results) for _ in xrange(n_workers)]
for w in workers:
w.start()
# Populate task's queue.
for i in xrange(self.n_trees):
tasks.put(Task(_tree_predict, (x, self.forest[i]), i))
# Add a poison pill for each worker.
for i in xrange(n_workers):
tasks.put(None)
# Wait for all of the tasks to finish.
tasks.join()
# Retrieve results i.e. the votes of the trees from the queue i.e
# an array of shape [n_trees, n_samples].
votes = np.array(retrieve(results, self.n_trees), int)
# Count up the votes of the trees.
n_classes = len(np.unique(votes))
counts = np.apply_along_axis(
lambda z: np.bincount(z, minlength=n_classes), 0, votes)
# Classify each sample according to the majority of the votes.
y_pred = np.argmax(counts, axis=0)
return y_pred, counts / self.n_trees
示例3: Pool
# 需要导入模块: from multiprocessing import Manager [as 别名]
# 或者: from multiprocessing.Manager import put [as 别名]
class Pool(object):
"""
The Pool class represents a pool of worker threads. It has methods
which allows tasks to be offloaded to the worker processes in a
few different ways
"""
def __init__(self, num_workers, name="Pool"):
"""
\param num_workers (integer) number of worker threads to start
\param name (string) prefix for the worker threads' name
"""
self.queue = Manager().Queue()
self.closed = False
self.workers = []
for idx in range(num_workers):
process = PoolWorker(self.queue, name="%s-Worker-%d" % (name, idx))
process.daemon = True
try:
process.start()
except:
# If one thread has a problem, undo everything
self.terminate()
raise
else:
self.workers.append(process)
def submit(self, work_unit):
self.queue.put(work_unit)
def close(self):
"""Prevents any more tasks from being submitted to the
pool. Once all the tasks have been completed the worker
processes will exit."""
# No lock here. We assume it's sufficiently atomic...
self.closed = True
def terminate(self):
"""Stops the worker processes immediately without completing
outstanding work. When the pool object is garbage collected
terminate() will be called immediately."""
self.close()
# Clearing the job queue
try:
while 1:
self.queue.get_nowait()
# except Manager().Queue.empty():
except:
pass
# Send one sentinel for each worker thread: each thread will die
# eventually, leaving the next sentinel for the next thread
for process in self.workers:
self.queue.put(SENTINEL)
示例4: main
# 需要导入模块: from multiprocessing import Manager [as 别名]
# 或者: from multiprocessing.Manager import put [as 别名]
def main():
arg_parser = argparse.ArgumentParser(description='bbd compressing program')
arg_parser.add_argument('-compress_from_dir', type=str, default='.',
help='directory where needs to be compressed')
arg_parser.add_argument('-compress_to_dir', type=str, default='.',
help='directory where puts compressed file')
arg_parser.add_argument('-compress_method', default='bz2', choices=['bz2', 'gz'],
help='the method of compressing, '
'support bz2 and gz, bz2 is default')
arg_parser.add_argument('-compress_dir_match', default=None,
help='regular expressions what matches '
'which directories can be compressed')
arg_parser.add_argument('-compress_file_match', default=None,
help='regular expressions what matches '
'which files can be compressed')
args = arg_parser.parse_args()
kwargs = dict()
kwargs['compress_from_dir'] = os.path.abspath(args.compress_from_dir)
kwargs['compress_to_dir'] = os.path.abspath(args.compress_to_dir)
kwargs['compress_method'] = args.compress_method
kwargs['compress_dir_match'] = args.compress_dir_match
kwargs['compress_file_match'] = args.compress_file_match
print('Operating parameters are as follows:')
print('\t' + '\n\t'.join(['{}: {}'.format(k, v) for k, v in kwargs.items()]))
if check_compress_proc_is_alive():
return
if kwargs['compress_from_dir'] == kwargs['compress_to_dir']:
print(kwargs['compress_from_dir'], kwargs['compress_to_dir'])
compress_to_dir = os.path.join(kwargs['compress_to_dir'], 'flume_compressed_data')
kwargs['compress_to_dir'] = compress_to_dir
os.makedirs(compress_to_dir, exist_ok=True)
max_worker = cpu_count() if cpu_count() <= 8 else 8
pool_cls = Pool
compressed_queue = Manager().Queue()
print('using multi processes to compress files')
path_mgr = PathUtils(**kwargs)
compressed_data_dir = Path(kwargs['target_dir']) / 'bbd_compressed_data_dir'
compress_method = kwargs['compress_method']
for file_path in path_mgr.match_need_compress_files():
from_path = str(file_path.absolute())
to_path = str((compressed_data_dir / file_path.name).absolute())
compressed_queue.put((from_path, to_path, compress_method))
if compressed_queue.empty():
print('there is no file need to be compressed, waiting for next checking')
return
multi_workers(max_worker=max_worker, pool_cls=pool_cls, work=compress_file,
compressed_queue=compressed_queue)
示例5: test_variant_printer
# 需要导入模块: from multiprocessing import Manager [as 别名]
# 或者: from multiprocessing.Manager import put [as 别名]
def test_variant_printer():
"""Test the variant printer"""
vcf_file = setup_vcf_file()
variant_queue = Manager().Queue()
head = HeaderParser()
outfile = NamedTemporaryFile(mode='w+t', delete=False, suffix='.vcf')
outfile.close()
variant_printer = VariantPrinter(
task_queue=variant_queue,
head=head,
mode='chromosome',
outfile = outfile.name
)
variant_printer.start()
batch = OrderedDict()
for line in open(vcf_file):
line = line.rstrip()
if line.startswith('#'):
if line.startswith('##'):
head.parse_meta_data(line)
else:
head.parse_header_line(line)
else:
variant_dict = get_variant_dict(line, head.header)
variant_id = get_variant_id(variant_dict)
variant_dict['variant_id'] = variant_id
variant_dict['info_dict'] = get_info_dict(variant_dict['INFO'])
variant_queue.put(variant_dict)
variant_queue.put(None)
variant_printer.join()
variants = []
with open(outfile.name, 'r', 'utf-8-sig') as f:
for line in f:
variants.append(line.rstrip().split('\t'))
assert variants[0][0] == '1'
assert variants[0][2] == '11900'
示例6: fit
# 需要导入模块: from multiprocessing import Manager [as 别名]
# 或者: from multiprocessing.Manager import put [as 别名]
def fit(self, x, y):
""" Build a random forest of trees from the training set (x, y).
Parameters
----------
x : array-like of shape = [n_samples, n_features]
The training input samples.
y : array-like of shape = [n_samples]
The target values (integers that correspond to classes).
Returns
-------
self : object
Returns self.
"""
if self.n_jobs == -1:
n_workers = min(cpu_count(), self.n_trees)
else:
n_workers = min(self.n_jobs, self.n_trees)
# Establish communication queues.
tasks = Manager().JoinableQueue()
results = Manager().Queue()
# Start workers.
workers = [Worker(tasks, results) for _ in xrange(n_workers)]
for w in workers:
w.start()
# Populate task's queue.
for i in xrange(self.n_trees):
# Create a new random state for each tree.
random_state = np.random.RandomState(i)
tasks.put(Task(_build_tree, (x, y, self,random_state), i))
# Add a poison pill for each worker.
for i in xrange(n_workers):
tasks.put(None)
# Wait for all of the tasks to finish.
tasks.join()
# Retrieve results i.e. the trees from the queue.
self.forest = retrieve(results, self.n_trees)
return self
示例7: main
# 需要导入模块: from multiprocessing import Manager [as 别名]
# 或者: from multiprocessing.Manager import put [as 别名]
def main(args):
args = parse_args()
start_message = """
Command line: {cmd}
Starting SVM analysis.
Looking at {nbFiles} files.
Forward mapped strand: {forward}.
Using weights: {weights}.
Iterations: {iter}.
Train/test split: {train_test}.
Kernel: {kernel}
Output to: {out}""".format(nbFiles=args.nb_files, forward=args.forward, weights=args.weighted,
iter=args.iter, train_test=args.split, kernel=args.kernel, out=args.out,
cmd=" ".join(sys.argv[:]))
print >> sys.stderr, start_message
motifs = [747, 354, 148, 796, 289, 363, 755, 626, 813, 653, 525, 80, 874]
workers = args.jobs
work_queue = Manager().Queue()
done_queue = Manager().Queue()
jobs = []
for motif in motifs:
svm_args = {
"c_files": args.c_files,
"mc_files": args.mc_files,
"hmc_files": args.hmc_files,
"weighted": args.weighted,
"forward": args.forward,
"ref_start": motif,
"train_test_split": args.split,
"iterations": args.iter,
"out_path": args.out,
"kernel": args.kernel,
"max_samples": args.nb_files,
"C": args.C
}
work_queue.put(svm_args)
for w in xrange(workers):
p = Process(target=run_svm, args=(work_queue, done_queue))
p.start()
jobs.append(p)
work_queue.put('STOP')
for p in jobs:
p.join()
done_queue.put('STOP')
print >> sys.stderr, "\n\tFinished SVM"
示例8: main
# 需要导入模块: from multiprocessing import Manager [as 别名]
# 或者: from multiprocessing.Manager import put [as 别名]
def main(args):
args = parse_args()
if args.ref is not None:
reference_sequence = get_reference_sequence(args.ref)
else:
reference_sequence = None
alns, forward_mask = get_alignments_labels_and_mask(args.in_files, args.n)
out_file = args.out
if args.positions is not None:
positions = {}
f, b = parse_substitution_file(args.positions)
positions['forward'] = f[1]
positions['backward'] = b[1]
else:
assert reference_sequence is not None, "Need to provide reference sequence if not providing positions"
positions = None
workers = args.nb_jobs
work_queue = Manager().Queue()
done_queue = Manager().Queue()
jobs = []
for aln, forward_bool in zip(alns, forward_mask):
call_methyl_args = {
"sequence": reference_sequence,
"alignment_file": aln,
"out_file": out_file,
"positions": positions,
"degenerate_type": degenerate_enum(args.degenerate),
"threshold": args.threshold,
"kmer_length": args.kmer_length,
}
#c = CallMethylation(**call_methyl_args)
#c.write()
work_queue.put(call_methyl_args)
for w in xrange(workers):
p = Process(target=run_methyl_caller, args=(work_queue, done_queue))
p.start()
jobs.append(p)
work_queue.put('STOP')
for p in jobs:
p.join()
done_queue.put('STOP')
示例9: run_service
# 需要导入模块: from multiprocessing import Manager [as 别名]
# 或者: from multiprocessing.Manager import put [as 别名]
def run_service(service, service_iterable, service_arguments, workers, iterable_argument):
# setup workers for multiprocessing
work_queue = Manager().Queue()
done_queue = Manager().Queue()
jobs = []
for x in service_iterable:
args = dict({iterable_argument: x},
**service_arguments)
work_queue.put(args)
for w in xrange(workers):
p = Process(target=service, args=(work_queue, done_queue))
p.start()
jobs.append(p)
work_queue.put('STOP')
for p in jobs:
p.join()
done_queue.put('STOP')
示例10: TestVariant
# 需要导入模块: from multiprocessing import Manager [as 别名]
# 或者: from multiprocessing.Manager import put [as 别名]
class TestVariant(object):
def __init__(self, name, compile_flags=[], variant_not_tags=[]):
self.name = name
self.compile_flags = \
['-WERExceptionSupport', '-ExtendedErrorStackForTestHost',
'-BaselineMode'] + compile_flags
self._compile_flags_has_expansion = self._has_expansion(compile_flags)
self.tags = tags.copy()
self.not_tags = not_tags.union(variant_not_tags).union(
['{}_{}'.format(x, name) for x in ('fails','exclude')])
self.msg_queue = Manager().Queue() # messages from multi processes
self.test_result = TestResult()
self.test_count = 0
self._print_lines = [] # _print lines buffer
self._last_len = 0
@staticmethod
def _has_expansion(flags):
return any(re.match('.*\${.*}', f) for f in flags)
@staticmethod
def _expand(flag, test):
return re.sub('\${id}', str(test.id), flag)
def _expand_compile_flags(self, test):
if self._compile_flags_has_expansion:
return [self._expand(flag, test) for flag in self.compile_flags]
return self.compile_flags
# check if this test variant should run a given test
def _should_test(self, test):
tags = split_tags(test.get('tags'))
if not tags.isdisjoint(self.not_tags):
return False
if self.tags and not self.tags.issubset(tags):
return False
if not_compile_flags: # exclude unsupported compile-flags if any
flags = test.get('compile-flags')
if flags and \
not not_compile_flags.isdisjoint(flags.lower().split()):
return False
return True
# print output from multi-process run, to be sent with result message
def _print(self, line):
self._print_lines.append(str(line))
# queue a test result from multi-process runs
def _log_result(self, test, fail):
output = '\n'.join(self._print_lines) # collect buffered _print output
self._print_lines = []
self.msg_queue.put((test.filename, fail, test.elapsed_time, output))
# (on main process) process one queued message
def _process_msg(self, msg):
filename, fail, elapsed_time, output = msg
self.test_result.log(filename, fail=fail)
line = '[{}/{} {:4.2f}] {} -> {}'.format(
self.test_result.total_count(),
self.test_count,
elapsed_time,
'Failed' if fail else 'Passed',
self._short_name(filename))
padding = self._last_len - len(line)
print(line + ' ' * padding, end='\n' if fail else '\r')
log_message(line)
self._last_len = len(line) if not fail else 0
if len(output) > 0:
print_and_log(output)
# get a shorter test file path for display only
def _short_name(self, filename):
folder = os.path.basename(os.path.dirname(filename))
return os.path.join(folder, os.path.basename(filename))
# (on main process) wait and process one queued message
def _process_one_msg(self):
self._process_msg(self.msg_queue.get())
# log a failed test with details
def _show_failed(self, test, flags, exit_code, output,
expected_output=None, timedout=False):
if timedout:
if warn_on_timeout:
self._print('WARNING: Test timed out!')
else:
self._print('ERROR: Test timed out!')
self._print('{} {} {}'.format(binary, ' '.join(flags), test.filename))
if expected_output == None or timedout:
self._print("\nOutput:")
self._print("----------------------------")
self._print(output.decode('utf-8'))
self._print("----------------------------")
else:
lst_output = output.split(b'\n')
lst_expected = expected_output.split(b'\n')
ln = min(len(lst_output), len(lst_expected))
for i in range(0, ln):
if lst_output[i] != lst_expected[i]:
#.........这里部分代码省略.........
示例11: __init__
# 需要导入模块: from multiprocessing import Manager [as 别名]
# 或者: from multiprocessing.Manager import put [as 别名]
class ConcurrentPandas:
"""
Concurrent Pandas is a class for concurrent asynchronous data downloads
from a variety of sources using either threads, or processes.
"""
def __init__(self):
self.output_map = Manager().dict()
self.input_queue = Manager().Queue()
self.data_worker = None
self.worker_args = None
self.source_name = None
def consume_keys(self):
"""
Work through the keys to look up sequentially
"""
print("\nLooking up " + self.input_queue.qsize().__str__() + " keys from " + self.source_name + "\n")
self.data_worker(**self.worker_args)
def consume_keys_asynchronous_processes(self):
"""
Work through the keys to look up asynchronously using multiple processes
"""
print("\nLooking up " + self.input_queue.qsize().__str__() + " keys from " + self.source_name + "\n")
jobs = multiprocessing.cpu_count()*4 if (multiprocessing.cpu_count()*4 < self.input_queue.qsize()) \
else self.input_queue.qsize()
pool = multiprocessing.Pool(processes=jobs, maxtasksperchild=10)
for x in range(jobs):
pool.apply(self.data_worker, [], self.worker_args)
pool.close()
pool.join()
def consume_keys_asynchronous_threads(self):
"""
Work through the keys to look up asynchronously using multiple threads
"""
print("\nLooking up " + self.input_queue.qsize().__str__() + " keys from " + self.source_name + "\n")
jobs = multiprocessing.cpu_count()*4 if (multiprocessing.cpu_count()*4 < self.input_queue.qsize()) \
else self.input_queue.qsize()
pool = ThreadPool(jobs)
for x in range(jobs):
pool.apply(self.data_worker, [], self.worker_args)
pool.close()
pool.join()
def return_map(self):
"""
Return hashmap consisting of key string -> data frame
"""
return self.output_map
def return_input_queue(self):
"""
Return input Queue
"""
return self.input_queue
def insert_keys(self, *args):
"""
Unpack each key and add to queue
"""
for key in args:
self.unpack(key)
def unpack(self, to_unpack):
"""
Unpack is a recursive function that will unpack anything that inherits
from abstract base class Container provided it is not also inheriting from Python basestring.
Raise Exception if resulting object is neither a container or a string
Code working in both Python 2 and Python 3
"""
# Python 3 lacks basestring type, work around below
try:
isinstance(to_unpack, basestring)
except NameError:
basestring = str
# Base Case
if isinstance(to_unpack, basestring):
self.input_queue.put(to_unpack)
return
for possible_key in to_unpack:
if isinstance(possible_key, basestring):
self.input_queue.put(possible_key)
elif sys.version_info >= (3, 0):
if isinstance(possible_key, collections.abc.Container) and not isinstance(possible_key, basestring):
self.unpack(possible_key)
else:
raise Exception("A type that is neither a string or a container was passed to unpack. "
"Aborting!")
#.........这里部分代码省略.........
示例12: main
# 需要导入模块: from multiprocessing import Manager [as 别名]
# 或者: from multiprocessing.Manager import put [as 别名]
def main(argv):
# parse command line arguments
args = parse_args()
# build the HDP if that's what we're doing
if args.buildHDP is not None:
assert (None not in [args.buildHDP, args.templateHDP, args.complementHDP, args.buildAlignments])
build_hdp(hdp_type=args.buildHDP, template_hdp_path=args.templateHDP, complement_hdp_path=args.complementHDP,
alignments=args.buildAlignments)
start_message = """\n
Starting Baum-Welch training.
Directories with training files: {files_dir}
Training on {amount} bases.
Using reference sequence: {ref}
Input template/complement models: {inTHmm}/{inCHmm}
Writing trained models to: {outLoc}
Performing {iterations} iterations.
Using model: {model}
Using HDPs: {thdp} / {chdp}
\n
""".format(files_dir=args.files_dir, amount=args.amount, ref=args.ref,
inTHmm=args.in_T_Hmm, inCHmm=args.in_C_Hmm, outLoc=args.out,
iterations=args.iter, model=args.stateMachineType, thdp=args.templateHDP, chdp=args.complementHDP)
assert (args.files_dir is not None), "Need to specify which files to train on"
assert (args.ref is not None), "Need to provide a reference file"
assert (args.out is not None), "Need to know the working directory for training"
print(start_message, file=sys.stdout)
if not os.path.isfile(args.ref): # TODO make this is_fasta(args.ref)
print("Did not find valid reference file", file=sys.stderr)
sys.exit(1)
# make directory to put the files we're using files
working_folder = FolderHandler()
working_directory_path = working_folder.open_folder(args.out + "tempFiles_expectations")
reference_seq = working_folder.add_file_path("reference_seq.txt")
make_temp_sequence(args.ref, True, reference_seq)
# index the reference for bwa
print("signalAlign - indexing reference", file=sys.stderr)
bwa_ref_index = get_bwa_index(args.ref, working_directory_path)
print("signalAlign - indexing reference, done", file=sys.stderr)
# make model objects, these handle normalizing, loading, and writing
template_model = get_model(type=args.stateMachineType, symbol_set_size=4096, threshold=args.threshold)
complement_model = get_model(type=args.stateMachineType, symbol_set_size=4096, threshold=args.threshold)
# get the input HDP, if we're using it
if args.stateMachineType == "threeStateHdp":
assert (args.templateHDP is not None) and (args.complementHDP is not None), "Need to provide serialized HDP " \
"files for this stateMachineType"
assert (os.path.isfile(args.templateHDP)) and (os.path.isfile(args.complementHDP)), "Could not find the HDP" \
"files"
# make some paths to files to hold the HMMs
template_hmm = working_folder.add_file_path("template_trained.hmm")
complement_hmm = working_folder.add_file_path("complement_trained.hmm")
print("Starting {iterations} iterations.\n\n\t Running likelihoods\ni\tTempalte\tComplement".format(
iterations=args.iter), file=sys.stdout)
for i in xrange(args.iter):
# if we're starting there are no HMMs
if i == 0:
in_template_hmm = None
in_complement_hmm = None
else:
in_template_hmm = template_hmm
in_complement_hmm = complement_hmm
# if we're using 'mutated' or non-canonical reference sequences, they come in a list. if we're not then
# we make a list of the 'normal' reference sequence
if args.cytosine_sub is None:
cytosine_substitutions = [None] * len(args.files_dir)
else:
cytosine_substitutions = args.cytosine_sub
# first cull a set of files to get expectations on
training_files_and_subtitutions = cull_training_files(args.files_dir, cytosine_substitutions, args.amount)
# setup
workers = args.nb_jobs
work_queue = Manager().Queue()
done_queue = Manager().Queue()
jobs = []
# get expectations for all the files in the queue
for fast5, sub in training_files_and_subtitutions:
alignment_args = {
"in_fast5": fast5,
"reference": reference_seq,
"cytosine_substitution": sub,
"destination": working_directory_path,
"stateMachineType": args.stateMachineType,
"banded": args.banded,
"bwa_index": bwa_ref_index,
"in_templateHmm": in_template_hmm,
#.........这里部分代码省略.........
示例13: main
# 需要导入模块: from multiprocessing import Manager [as 别名]
# 或者: from multiprocessing.Manager import put [as 别名]
def main():
parser = argparse.ArgumentParser(description="Parse different kind of ped files.")
parser.add_argument('family_file',
type=str, nargs=1,
help='A pedigree file. Default is cmms format.'
)
parser.add_argument('variant_file',
type=str, nargs=1,
help='A variant file.Default is vcf format'
)
parser.add_argument('-o', '--outfile',
type=str, nargs=1, default=[None],
help='Specify the path to output, if no file specified the output will be printed to screen.'
)
parser.add_argument('--version',
action="version", version=pkg_resources.require("Mip_Family_Analysis")[0].version
)
parser.add_argument('-v', '--verbose',
action="store_true",
help='Increase output verbosity.'
)
parser.add_argument('-cmms', '--cmms',
action="store_true",
help='If run with cmms specific structure.'
)
parser.add_argument('-s', '--silent',
action="store_true",
help='Do not print the variants.'
)
parser.add_argument('-pos', '--position',
action="store_true",
help='If output should be sorted by position. Default is sorted on rank score'
)
parser.add_argument('-tres', '--treshold',
type=int, nargs=1,
help='Specify the lowest rank score to be outputted.'
)
args = parser.parse_args()
var_file = args.variant_file[0]
file_name, file_extension = os.path.splitext(var_file)
# Print program version to std err:
sys.stderr.write('Version: %s \n' % str(pkg_resources.require("Mip_Family_Analysis")[0].version))
start_time_analysis = datetime.now()
# Start by parsing at the pedigree file:
my_family = get_family(args)
preferred_models = my_family.models_of_inheritance
# Take care of the headers from the variant file:
head = get_header(var_file)
check_individuals(my_family, head, args)
add_cmms_metadata(head)
# The variant queue is just a queue with splitted variant lines:
variant_queue = JoinableQueue(maxsize=1000)
# The consumers will put their results in the results queue
results = Manager().Queue()
# Create a temporary file for the variants:
temp_file = NamedTemporaryFile(delete=False)
if args.verbose:
print('Temp files: %s' % temp_file.name)
num_model_checkers = (cpu_count()*2-1)
if args.verbose:
print ('Number of cpus: %s' % str(cpu_count()))
model_checkers = [variant_consumer.VariantConsumer(variant_queue, results, my_family, args.verbose)
for i in xrange(num_model_checkers)]
for w in model_checkers:
w.start()
var_printer = variant_printer.VariantPrinter(results, temp_file, head, args.verbose)
var_printer.start()
var_parser = variant_parser.VariantFileParser(var_file, variant_queue, head, args.verbose)
var_parser.parse()
for i in xrange(num_model_checkers):
variant_queue.put(None)
variant_queue.join()
results.put(None)
var_printer.join()
if args.verbose:
print('Models checked!')
print('Start sorting the variants: \n')
start_time_variant_sorting = datetime.now()
#.........这里部分代码省略.........
示例14: MultiProcessFile
# 需要导入模块: from multiprocessing import Manager [as 别名]
# 或者: from multiprocessing.Manager import put [as 别名]
class MultiProcessFile(object):
"""
helper for testing multiprocessing
multiprocessing poses a problem for doctests, since the strategy
of replacing sys.stdout/stderr with file-like objects then
inspecting the results won't work: the child processes will
write to the objects, but the data will not be reflected
in the parent doctest-ing process.
The solution is to create file-like objects which will interact with
multiprocessing in a more desirable way.
All processes can write to this object, but only the creator can read.
This allows the testing system to see a unified picture of I/O.
"""
def __init__(self):
# per advice at:
# http://docs.python.org/library/multiprocessing.html#all-platforms
self.__master = getpid()
self.__queue = Manager().Queue()
self.__buffer = StringIO()
self.softspace = 0
def buffer(self):
if getpid() != self.__master:
return
from Queue import Empty
from collections import defaultdict
cache = defaultdict(str)
while True:
try:
pid, data = self.__queue.get_nowait()
except Empty:
break
if pid == ():
#show parent output after children
#this is what users see, usually
pid = ( 1e100, ) # googol!
cache[pid] += data
for pid in sorted(cache):
#self.__buffer.write( '%s wrote: %r\n' % (pid, cache[pid]) ) #DEBUG
self.__buffer.write( cache[pid] )
def write(self, data):
# note that these pids are in the form of current_process()._identity
# rather than OS pids
from multiprocessing import current_process
pid = current_process()._identity
self.__queue.put((pid, data))
def __iter__(self):
"getattr doesn't work for iter()"
self.buffer()
return self.__buffer
def seek(self, offset, whence=0):
self.buffer()
return self.__buffer.seek(offset, whence)
def getvalue(self):
self.buffer()
return self.__buffer.getvalue()
def __getattr__(self, attr):
return getattr(self.__buffer, attr)
示例15: models
# 需要导入模块: from multiprocessing import Manager [as 别名]
# 或者: from multiprocessing.Manager import put [as 别名]
#.........这里部分代码省略.........
variant_queue = JoinableQueue(maxsize=1000)
logger.debug("Setting up a Queue for storing results from workers")
results = Manager().Queue()
num_model_checkers = processes
#Adapt the number of processes to the machine that run the analysis
logger.info('Number of CPU:s {}'.format(cpu_count()))
logger.info('Number of model checkers: {}'.format(num_model_checkers))
# These are the workers that do the heavy part of the analysis
logger.info('Seting up the workers')
model_checkers = [
VariantAnnotator(
task_queue=variant_queue,
results_queue=results,
families=families,
individuals=analysis_individuals,
phased=phased,
strict=strict,
whole_gene=whole_gene,
vep=vep,
reduced_penetrance_genes = reduced_penetrance_genes
)
for i in range(num_model_checkers)
]
logger.info('Starting the workers')
for worker in model_checkers:
logger.debug('Starting worker {0}'.format(worker))
worker.start()
# This process prints the variants to temporary files
logger.info('Seting up the variant printer')
if len(model_checkers) == 1:
print_headers(head=head, outfile=outfile, silent=silent)
variant_printer = VariantPrinter(
task_queue=results,
head=head,
mode='normal',
outfile = outfile
)
else:
# We use a temp file to store the processed variants
logger.debug("Build a tempfile for printing the variants")
if temp_dir:
temp_file = NamedTemporaryFile(delete=False, dir=temp_dir)
else:
temp_file = NamedTemporaryFile(delete=False)
temp_file.close()
variant_printer = VariantPrinter(
task_queue=results,
head=head,
mode='chromosome',
outfile = temp_file.name
)
logger.info('Starting the variant printer process')
variant_printer.start()
start_time_variant_parsing = datetime.now()
# This process parses the original vcf and create batches to put in the variant queue:
logger.info('Start parsing the variants')
chromosome_list = get_batches(
variants = variant_file,
batch_queue = variant_queue,
header = head,
vep = vep,
annotation_keyword = keyword
)
logger.debug("Put stop signs in the variant queue")
for i in range(num_model_checkers):
variant_queue.put(None)
variant_queue.join()
results.put(None)
variant_printer.join()
if len(model_checkers) > 1:
sort_variants(infile=temp_file.name, mode='chromosome')
print_headers(head=head, outfile=outfile, silent=silent)
with open(temp_file.name, 'r', encoding='utf-8') as f:
for line in f:
print_variant(
variant_line=line,
outfile=outfile,
mode='modified',
silent=silent
)
logger.debug("Removing temp file")
os.remove(temp_file.name)
logger.debug("Temp file removed")
logger.info('Time for whole analyis: {0}'.format(
str(datetime.now() - start_time_analysis)))