当前位置: 首页>>代码示例>>Python>>正文


Python multiprocess.Pool方法代码示例

本文整理汇总了Python中multiprocess.Pool方法的典型用法代码示例。如果您正苦于以下问题:Python multiprocess.Pool方法的具体用法?Python multiprocess.Pool怎么用?Python multiprocess.Pool使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在multiprocess的用法示例。


在下文中一共展示了multiprocess.Pool方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: format_to_bert

# 需要导入模块: import multiprocess [as 别名]
# 或者: from multiprocess import Pool [as 别名]
def format_to_bert(args):
    if (args.dataset != ''):
        datasets = [args.dataset]
    else:
        datasets = ['train', 'valid', 'test']
    for corpus_type in datasets:
        a_lst = []
        for json_f in glob.glob(pjoin(args.raw_path, '*' + corpus_type + '.*.json')):
            real_name = json_f.split('/')[-1]
            a_lst.append((corpus_type, json_f, args, pjoin(args.save_path, real_name.replace('json', 'bert.pt'))))
        print(a_lst)
        pool = Pool(args.n_cpus)
        for d in pool.imap(_format_to_bert, a_lst):
            pass

        pool.close()
        pool.join() 
开发者ID:nlpyang,项目名称:PreSumm,代码行数:19,代码来源:data_builder.py

示例2: format_to_bert

# 需要导入模块: import multiprocess [as 别名]
# 或者: from multiprocess import Pool [as 别名]
def format_to_bert(args):
    if (args.dataset != ''):
        datasets = [args.dataset]
    else:
        datasets = ['train', 'valid', 'test']
    for corpus_type in datasets:
        a_lst = []
        for json_f in glob.glob(pjoin(args.raw_path, '*' + corpus_type + '.*.json')):
            real_name = json_f.split('/')[-1]
            a_lst.append((json_f, args, pjoin(args.save_path, real_name.replace('json', 'bert.pt'))))
        print(a_lst)
        pool = Pool(args.n_cpus)
        for d in pool.imap(_format_to_bert, a_lst):
            pass

        pool.close()
        pool.join() 
开发者ID:nlpyang,项目名称:BertSum,代码行数:19,代码来源:data_builder.py

示例3: format_xsum_to_lines

# 需要导入模块: import multiprocess [as 别名]
# 或者: from multiprocess import Pool [as 别名]
def format_xsum_to_lines(args):
    if (args.dataset != ''):
        datasets = [args.dataset]
    else:
        datasets = ['train', 'test', 'valid']

    corpus_mapping = json.load(open(pjoin(args.raw_path, 'XSum-TRAINING-DEV-TEST-SPLIT-90-5-5.json')))

    for corpus_type in datasets:
        mapped_fnames = corpus_mapping[corpus_type]
        root_src = pjoin(args.raw_path, 'restbody')
        root_tgt = pjoin(args.raw_path, 'firstsentence')
        # realnames = [fname.split('.')[0] for fname in os.listdir(root_src)]
        realnames = mapped_fnames

        a_lst = [(root_src, root_tgt, n) for n in realnames]
        pool = Pool(args.n_cpus)
        dataset = []
        p_ct = 0
        for d in pool.imap_unordered(_format_xsum_to_lines, a_lst):
            if (d is None):
                continue
            dataset.append(d)
            if (len(dataset) > args.shard_size):
                pt_file = "{:s}.{:s}.{:d}.json".format(args.save_path, corpus_type, p_ct)
                with open(pt_file, 'w') as save:
                    save.write(json.dumps(dataset))
                    p_ct += 1
                    dataset = []

        pool.close()
        pool.join()
        if (len(dataset) > 0):
            pt_file = "{:s}.{:s}.{:d}.json".format(args.save_path, corpus_type, p_ct)
            with open(pt_file, 'w') as save:
                save.write(json.dumps(dataset))
                p_ct += 1
                dataset = [] 
开发者ID:nlpyang,项目名称:PreSumm,代码行数:40,代码来源:data_builder.py

示例4: update_catalog_collection

# 需要导入模块: import multiprocess [as 别名]
# 或者: from multiprocess import Pool [as 别名]
def update_catalog_collection(elements, max_miller, n_processes=1, mp_query=None):
    '''
    This function will add enumerate and add adsorption sites to our `catalog`
    Mongo collection.

    Args:
        elements        A list of strings indicating the elements you are
                        looking for, e.g., ['Cu', 'Al']
        max_miller      An integer indicating the maximum Miller index to be
                        enumerated
        n_processes     An integer indicating how many threads you want to use
                        when running the tasks. If you do not expect many
                        updates, stick to the default of 1, or go up to 4. If
                        you are re-creating your collection from scratch, you
                        may want to want to increase this argument as high as
                        you can.
        mp_query        We get our bulks from The Materials Project. This
                        dictionary argument is used as a Mongo query to The
                        Materials Project Database. If you do not supply this
                        argument, then it will automatically filter out bulks
                        whose energies above the hull are greater than 0.1 eV
                        and whose formation energy per atom are above 0 eV.
    '''
    # Python doesn't like mutable arguments
    if mp_query is None:
        mp_query = {}

    # Figure out the MPIDs we need to enumerate
    get_mpid_task = _GetMpids(elements=elements, mp_query=mp_query)
    schedule_tasks([get_mpid_task])
    mpids = get_task_output(get_mpid_task)

    # For each MPID, enumerate all the sites and then add them to our `catalog`
    # Mongo collection. Do this in parallel because it can be.
    if n_processes > 1:
        with multiprocess.Pool(n_processes) as pool:
            list(pool.imap(func=lambda mpid: __run_insert_to_catalog_task(mpid, max_miller),
                           iterable=mpids, chunksize=20))
    else:
        for mpid in mpids:
            __run_insert_to_catalog_task(mpid, max_miller) 
开发者ID:ulissigroup,项目名称:GASpy,代码行数:43,代码来源:catalog.py

示例5: Pool

# 需要导入模块: import multiprocess [as 别名]
# 或者: from multiprocess import Pool [as 别名]
def Pool(n=1): # workaround
            class mpool():
                def map(self,f,xs):
                  return [f(x) for x in xs]
                def terminate(self): return None # dummy function
            return mpool() 
开发者ID:joselado,项目名称:quantum-honeycomp,代码行数:8,代码来源:parallel.py

示例6: set_cores

# 需要导入模块: import multiprocess [as 别名]
# 或者: from multiprocess import Pool [as 别名]
def set_cores(n=1):
    global cores
    cores = n


#mainpool = None

#def initialize(): 
#  global mainpool
#  if cores>1:
#    mainpool = Pool(cores) # create pool
#  return mainpool

#def finish(): mainpool=None # delete pool 
开发者ID:joselado,项目名称:quantum-honeycomp,代码行数:16,代码来源:parallel.py

示例7: pcall_mp

# 需要导入模块: import multiprocess [as 别名]
# 或者: from multiprocess import Pool [as 别名]
def pcall_mp(fun,args,cores=cores):
    """Calls a function for every input in args"""
    mainpool = Pool(cores) # create pool
#    print("Using",cores,"cores")
    out = mainpool.map(fun,args) # return list
    mainpool.terminate() # clear the pool
    del mainpool # delete pool
    return out
#except:
#  print("Multiprocessing not found, running in a single core")
#  def pcall_mp(fun,args,cores=1): return pcall_serial(fun,args) 
开发者ID:joselado,项目名称:quantum-honeycomp,代码行数:13,代码来源:parallel.py

示例8: format_to_lines

# 需要导入模块: import multiprocess [as 别名]
# 或者: from multiprocess import Pool [as 别名]
def format_to_lines(args):
    corpus_mapping = {}
    for corpus_type in ['valid', 'test', 'train']:
        temp = []
        for line in open(pjoin(args.map_path, 'mapping_' + corpus_type + '.txt')):
            temp.append(hashhex(line.strip()))
        corpus_mapping[corpus_type] = {key.strip(): 1 for key in temp}
    train_files, valid_files, test_files = [], [], []
    for f in glob.glob(pjoin(args.raw_path, '*.json')):
        real_name = f.split('/')[-1].split('.')[0]
        if (real_name in corpus_mapping['valid']):
            valid_files.append(f)
        elif (real_name in corpus_mapping['test']):
            test_files.append(f)
        elif (real_name in corpus_mapping['train']):
            train_files.append(f)
        # else:
        #     train_files.append(f)

    corpora = {'train': train_files, 'valid': valid_files, 'test': test_files}
    for corpus_type in ['train', 'valid', 'test']:
        a_lst = [(f, args) for f in corpora[corpus_type]]
        pool = Pool(args.n_cpus)
        dataset = []
        p_ct = 0
        for d in pool.imap_unordered(_format_to_lines, a_lst):
            dataset.append(d)
            if (len(dataset) > args.shard_size):
                pt_file = "{:s}.{:s}.{:d}.json".format(args.save_path, corpus_type, p_ct)
                with open(pt_file, 'w') as save:
                    # save.write('\n'.join(dataset))
                    save.write(json.dumps(dataset))
                    p_ct += 1
                    dataset = []

        pool.close()
        pool.join()
        if (len(dataset) > 0):
            pt_file = "{:s}.{:s}.{:d}.json".format(args.save_path, corpus_type, p_ct)
            with open(pt_file, 'w') as save:
                # save.write('\n'.join(dataset))
                save.write(json.dumps(dataset))
                p_ct += 1
                dataset = [] 
开发者ID:nlpyang,项目名称:PreSumm,代码行数:46,代码来源:data_builder.py

示例9: format_to_lines

# 需要导入模块: import multiprocess [as 别名]
# 或者: from multiprocess import Pool [as 别名]
def format_to_lines(args):
    corpus_mapping = {}
    for corpus_type in ['valid', 'test', 'train']:
        temp = []
        for line in open(pjoin(args.map_path, 'mapping_' + corpus_type + '.txt')):
            temp.append(hashhex(line.strip()))
        corpus_mapping[corpus_type] = {key.strip(): 1 for key in temp}
    train_files, valid_files, test_files = [], [], []
    for f in glob.glob(pjoin(args.raw_path, '*.json')):
        real_name = f.split('/')[-1].split('.')[0]
        if (real_name in corpus_mapping['valid']):
            valid_files.append(f)
        elif (real_name in corpus_mapping['test']):
            test_files.append(f)
        elif (real_name in corpus_mapping['train']):
            train_files.append(f)

    corpora = {'train': train_files, 'valid': valid_files, 'test': test_files}
    for corpus_type in ['train', 'valid', 'test']:
        a_lst = [(f, args) for f in corpora[corpus_type]]
        pool = Pool(args.n_cpus)
        dataset = []
        p_ct = 0
        for d in pool.imap_unordered(_format_to_lines, a_lst):
            dataset.append(d)
            if (len(dataset) > args.shard_size):
                pt_file = "{:s}.{:s}.{:d}.json".format(args.save_path, corpus_type, p_ct)
                with open(pt_file, 'w') as save:
                    # save.write('\n'.join(dataset))
                    save.write(json.dumps(dataset))
                    p_ct += 1
                    dataset = []

        pool.close()
        pool.join()
        if (len(dataset) > 0):
            pt_file = "{:s}.{:s}.{:d}.json".format(args.save_path, corpus_type, p_ct)
            with open(pt_file, 'w') as save:
                # save.write('\n'.join(dataset))
                save.write(json.dumps(dataset))
                p_ct += 1
                dataset = [] 
开发者ID:nlpyang,项目名称:BertSum,代码行数:44,代码来源:data_builder.py

示例10: tabix

# 需要导入模块: import multiprocess [as 别名]
# 或者: from multiprocess import Pool [as 别名]
def tabix(bins, pairs_path, cool_path, metadata, assembly, nproc, zero_based, max_split, **kwargs):
    """
    Bin a tabix-indexed contact list file.

    {}

    See also: 'cooler csort' to sort and index a contact list file

    Tabix manpage: <http://www.htslib.org/doc/tabix.html>.

    """
    logger = get_logger(__name__)
    chromsizes, bins = parse_bins(bins)

    if metadata is not None:
        with open(metadata, 'r') as f:
            metadata = json.load(f)

    try:
        if nproc > 1:
            pool = Pool(nproc)
            logger.info("Using {} cores".format(nproc))
            map = pool.imap
        else:
            map = six.moves.map

        opts = {}
        if 'chrom2' in kwargs:
            opts['C2'] = kwargs['chrom2'] - 1
        if 'pos2' in kwargs:
            opts['P2'] = kwargs['pos2'] - 1

        iterator = TabixAggregator(
            pairs_path,
            chromsizes,
            bins,
            map=map,
            is_one_based=(not zero_based),
            n_chunks=max_split,
            **opts
        )

        create_cooler(
            cool_path, bins, iterator,
            metadata=metadata,
            assembly=assembly,
            ordered=True)
    finally:
        if nproc > 1:
            pool.close() 
开发者ID:mirnylab,项目名称:cooler,代码行数:52,代码来源:cload.py

示例11: pairix

# 需要导入模块: import multiprocess [as 别名]
# 或者: from multiprocess import Pool [as 别名]
def pairix(bins, pairs_path, cool_path, metadata, assembly, nproc, zero_based, max_split):
    """
    Bin a pairix-indexed contact list file.

    {}

    See also: 'cooler csort' to sort and index a contact list file

    Pairix on GitHub: <https://github.com/4dn-dcic/pairix>.

    """
    logger = get_logger(__name__)
    chromsizes, bins = parse_bins(bins)

    if metadata is not None:
        with open(metadata, 'r') as f:
            metadata = json.load(f)

    try:
        if nproc > 1:
            pool = Pool(nproc)
            logger.info("Using {} cores".format(nproc))
            map = pool.imap
        else:
            map = six.moves.map

        iterator = PairixAggregator(
            pairs_path,
            chromsizes,
            bins,
            map=map,
            is_one_based=(not zero_based),
            n_chunks=max_split)

        create_cooler(
            cool_path, bins, iterator,
            metadata=metadata,
            assembly=assembly,
            ordered=True)
    finally:
        if nproc > 1:
            pool.close() 
开发者ID:mirnylab,项目名称:cooler,代码行数:44,代码来源:cload.py

示例12: multimap

# 需要导入模块: import multiprocess [as 别名]
# 或者: from multiprocess import Pool [as 别名]
def multimap(function, inputs, chunked=False, processes=32, maxtasksperchild=1,
             chunksize=1, n_calcs=None):
    '''
    This function is a wrapper to parallelize a function.

    Args:
        function            The function you want to execute
        inputs              An iterable that yields proper arguments to the
                            function
        chunked             A Boolean indicating whether your function expects
                            single arguments or "chunked" iterables, e.g.,
                            lists.
        processes           The number of threads/processes you want to be using
        maxtasksperchild    The maximum number of tasks that a child process
                            may do before terminating (and therefore clearing
                            its memory cache to avoid memory overload).
        chunksize           How many calculations you want to have each single
                            processor do per task. Smaller chunks means more
                            memory shuffling. Bigger chunks means more RAM
                            requirements.
        n_calcs             How many calculations you have. Only necessary for
                            adding a percentage timer to the progress bar.
    Returns:
        outputs     A list of the inputs mapped through the function
    '''
    # Collect garbage before we begin multiprocessing to make sure we don't
    # pass things we don't need to
    gc.collect()

    # If we have one thread, there's no use multiprocessing
    if processes == 1:
        output = [function(input_) for input_ in tqdm(inputs, total=n_calcs)]
        return output

    with Pool(processes=processes, maxtasksperchild=maxtasksperchild) as pool:
        # Use multiprocessing to perform the calculations. We use imap instead
        # of map so that we get an iterator, which we need for tqdm (the
        # progress bar) to work. imap also requires less disk memory, which
        # can be an issue for some of our large systems.
        if not chunked:
            iterator = pool.imap(function, inputs, chunksize=chunksize)
            total = n_calcs
            outputs = list(tqdm(iterator, total=total))

        # If our function expects chunks, then we have to unpack our inputs
        # appropriately
        else:
            iterator = pool.imap(function, _chunk(inputs, n=chunksize))
            total = n_calcs / chunksize
            outputs = list(np.concatenate(list(tqdm(iterator, total=total))))

    return outputs 
开发者ID:ulissigroup,项目名称:GASpy,代码行数:54,代码来源:utils.py


注:本文中的multiprocess.Pool方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。