当前位置: 首页>>代码示例>>Python>>正文


Python toolz.partition_all方法代码示例

本文整理汇总了Python中toolz.partition_all方法的典型用法代码示例。如果您正苦于以下问题:Python toolz.partition_all方法的具体用法?Python toolz.partition_all怎么用?Python toolz.partition_all使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在toolz的用法示例。


在下文中一共展示了toolz.partition_all方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: sb_filter

# 需要导入模块: import toolz [as 别名]
# 或者: from toolz import partition_all [as 别名]
def sb_filter(fastq, bc, cores, nedit):
    ''' Filters reads with non-matching sample barcodes
    Expects formatted fastq files.
    '''
    barcodes = set(sb.strip() for sb in bc)
    if nedit == 0:
        filter_sb = partial(exact_sample_filter2, barcodes=barcodes)
    else:
        barcodehash = MutationHash(barcodes, nedit)
        filter_sb = partial(correcting_sample_filter2, barcodehash=barcodehash)
    p = multiprocessing.Pool(cores)

    chunks = tz.partition_all(10000, read_fastq(fastq))
    bigchunks = tz.partition_all(cores, chunks)
    for bigchunk in bigchunks:
        for chunk in p.map(filter_sb, list(bigchunk)):
            for read in chunk:
                sys.stdout.write(read) 
开发者ID:vals,项目名称:umis,代码行数:20,代码来源:umis.py

示例2: _cache_accounts

# 需要导入模块: import toolz [as 别名]
# 或者: from toolz import partition_all [as 别名]
def _cache_accounts(cls, accounts, steem, trx=True):
        """Fetch all `accounts` and write to db."""
        timer = Timer(len(accounts), 'account', ['rps', 'wps'])
        for name_batch in partition_all(1000, accounts):
            cached_at = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')

            timer.batch_start()
            batch = steem.get_accounts(name_batch)

            timer.batch_lap()
            sqls = [cls._sql(acct, cached_at) for acct in batch]
            DB.batch_queries(sqls, trx)

            timer.batch_finish(len(batch))
            if trx or len(accounts) > 1000:
                log.info(timer.batch_status()) 
开发者ID:steemit,项目名称:hivemind,代码行数:18,代码来源:accounts.py

示例3: scrape_blockchain

# 需要导入模块: import toolz [as 别名]
# 或者: from toolz import partition_all [as 别名]
def scrape_blockchain(mongo):
    s = Steem()
    # see how far behind we are
    missing = list(range(last_block_num(mongo), s.last_irreversible_block_num))

    # if we are far behind blockchain head
    # split work in chunks of 100
    if len(missing) > 100:
        for batch in partition_all(100, missing):
            results = s.get_blocks(batch)
            insert_blocks(mongo, results)

    # otherwise continue as normal
    blockchain = Blockchain(mode="irreversible")
    hist = blockchain.stream_from(start_block=last_block_num(mongo), full_blocks=True)
    insert_blocks(mongo, hist) 
开发者ID:SteemData,项目名称:steemdata-mongo,代码行数:18,代码来源:scraper.py

示例4: cb_filter

# 需要导入模块: import toolz [as 别名]
# 或者: from toolz import partition_all [as 别名]
def cb_filter(fastq, bc1, bc2, bc3, cores, nedit):
    ''' Filters reads with non-matching barcodes
    Expects formatted fastq files.
    '''
    with open_gzipsafe(bc1) as bc1_fh:
        bc1 = set(cb.strip() for cb in bc1_fh)

    if bc2:
        with open_gzipsafe(bc2) as bc2_fh:
            bc2 = set(cb.strip() for cb in bc2_fh)
    if bc3:
        with open_gzipsafe(bc3) as bc3_fh:
            bc3 = set(cb.strip() for cb in bc3_fh)

    annotations = detect_fastq_annotations(fastq)
    re_string = construct_transformed_regex(annotations)

    if nedit == 0:
        filter_cb = partial(exact_barcode_filter, bc1=bc1, bc2=bc2, bc3=bc3,
                            re_string=re_string)
    else:
        bc1hash = MutationHash(bc1, nedit)
        bc2hash = None
        bc3hash = None
        if bc2:
            bc2hash = MutationHash(bc2, nedit)
        if bc3:
            bc3hash = MutationHash(bc3, nedit)
        filter_cb = partial(correcting_barcode_filter, bc1hash=bc1hash,
                            bc2hash=bc2hash, bc3hash=bc3hash, re_string=re_string)
    p = multiprocessing.Pool(cores)

    chunks = tz.partition_all(10000, read_fastq(fastq))
    bigchunks = tz.partition_all(cores, chunks)
    for bigchunk in bigchunks:
        for chunk in p.map(filter_cb, list(bigchunk)):
            for read in chunk:
                sys.stdout.write(read) 
开发者ID:vals,项目名称:umis,代码行数:40,代码来源:umis.py

示例5: mb_filter

# 需要导入模块: import toolz [as 别名]
# 或者: from toolz import partition_all [as 别名]
def mb_filter(fastq, cores):
    ''' Filters umis with non-ACGT bases
    Expects formatted fastq files.
    '''
    filter_mb = partial(umi_filter)
    p = multiprocessing.Pool(cores)

    chunks = tz.partition_all(10000, read_fastq(fastq))
    bigchunks = tz.partition_all(cores, chunks)
    for bigchunk in bigchunks:
        for chunk in p.map(filter_mb, list(bigchunk)):
            for read in chunk:
                sys.stdout.write(read) 
开发者ID:vals,项目名称:umis,代码行数:15,代码来源:umis.py

示例6: add_uid

# 需要导入模块: import toolz [as 别名]
# 或者: from toolz import partition_all [as 别名]
def add_uid(fastq, cores):
    ''' Adds UID:[samplebc cellbc umi] to readname for umi-tools deduplication
    Expects formatted fastq files with correct sample and cell barcodes.
    '''

    uids = partial(append_uids)
    p = multiprocessing.Pool(cores)

    chunks = tz.partition_all(10000, read_fastq(fastq))
    bigchunks = tz.partition_all(cores, chunks)
    for bigchunk in bigchunks:
        for chunk in p.map(uids, list(bigchunk)):
            for read in chunk:
                sys.stdout.write(read) 
开发者ID:vals,项目名称:umis,代码行数:16,代码来源:umis.py

示例7: send_topic_nodes

# 需要导入模块: import toolz [as 别名]
# 或者: from toolz import partition_all [as 别名]
def send_topic_nodes(
        self, node: kademlia.Node, echo: Hash32, nodes: Tuple[kademlia.Node, ...]
    ) -> None:
        encoded_nodes = tuple(
            n.address.to_endpoint() + [n.pubkey.to_bytes()] for n in nodes
        )
        max_neighbours = self._get_max_neighbours_per_packet()
        for batch in toolz.partition_all(max_neighbours, encoded_nodes):
            message = _pack_v5(CMD_TOPIC_NODES.id, (echo, batch), self.privkey)
            self.logger.trace(">>> topic_nodes to %s: %s", node, batch)
            self.send_v5(node, message) 
开发者ID:QuarkChain,项目名称:pyquarkchain,代码行数:13,代码来源:discovery.py

示例8: split_inline

# 需要导入模块: import toolz [as 别名]
# 或者: from toolz import partition_all [as 别名]
def split_inline(data_dir, val_ratio, test_ratio, overwrite=False, exclude_files=None):
    """Splits the inline data into train, val and test.

    Args:
        data_dir (str): path to directory that holds the data
        val_ratio (float): the ratio of the partition that will be used for validation
        test_ratio (float): the ratio of the partition that they should use for testing
        exclude_files (list[str]): filenames to exclude from dataset, such as ones that contain
            artifacts. Example:['image1.tiff']
    """
    num_partitions = 5
    image_dir = os.path.join(data_dir, "inlines")
    dir_paths = (os.path.join(image_dir, ddir) for ddir in ("train", "val", "test"))
    locations_list = [_create_directory(d, overwrite=overwrite) for d in dir_paths]  # train, val, test

    images_iter = glob.iglob(os.path.join(image_dir, "*.tiff"))

    if exclude_files is not None:
        images_list = list(itertools.filterfalse(lambda x: x in exclude_files, images_iter))
    else:
        images_list = list(images_iter)

    num_elements = math.ceil(len(images_list) / num_partitions)
    for partition in partition_all(num_elements, images_list):  # Partition files into N partitions
        for files_list, dest_dir in zip(_split_train_val_test(partition, val_ratio, test_ratio), locations_list):
            _copy_files(files_list, dest_dir) 
开发者ID:microsoft,项目名称:seismic-deeplearning,代码行数:28,代码来源:prepare_penobscot.py

示例9: from_checkpoints

# 需要导入模块: import toolz [as 别名]
# 或者: from toolz import partition_all [as 别名]
def from_checkpoints(self, chunk_size=1000):
        """Initial sync strategy: read from blocks on disk.

        This methods scans for files matching ./checkpoints/*.json.lst
        and uses them for hive's initial sync. Each line must contain
        exactly one block in JSON format.
        """
        # pylint: disable=no-self-use
        last_block = Blocks.head_num()

        tuplize = lambda path: [int(path.split('/')[-1].split('.')[0]), path]
        basedir = os.path.dirname(os.path.realpath(__file__ + "/../.."))
        files = glob.glob(basedir + "/checkpoints/*.json.lst")
        tuples = sorted(map(tuplize, files), key=lambda f: f[0])

        last_read = 0
        for (num, path) in tuples:
            if last_block < num:
                log.info("[SYNC] Load %s. Last block: %d", path, last_block)
                with open(path) as f:
                    # each line in file represents one block
                    # we can skip the blocks we already have
                    skip_lines = last_block - last_read
                    remaining = drop(skip_lines, f)
                    for lines in partition_all(chunk_size, remaining):
                        Blocks.process_multi(map(json.loads, lines), True)
                last_block = num
            last_read = num 
开发者ID:steemit,项目名称:hivemind,代码行数:30,代码来源:sync.py

示例10: compute_date_range_chunks

# 需要导入模块: import toolz [as 别名]
# 或者: from toolz import partition_all [as 别名]
def compute_date_range_chunks(sessions, start_date, end_date, chunksize):
    """Compute the start and end dates to run a pipeline for.

    Parameters
    ----------
    sessions : DatetimeIndex
        The available dates.
    start_date : pd.Timestamp
        The first date in the pipeline.
    end_date : pd.Timestamp
        The last date in the pipeline.
    chunksize : int or None
        The size of the chunks to run. Setting this to None returns one chunk.

    Returns
    -------
    ranges : iterable[(np.datetime64, np.datetime64)]
        A sequence of start and end dates to run the pipeline for.
    """
    if start_date not in sessions:
        raise KeyError("Start date %s is not found in calendar." %
                       (start_date.strftime("%Y-%m-%d"),))
    if end_date not in sessions:
        raise KeyError("End date %s is not found in calendar." %
                       (end_date.strftime("%Y-%m-%d"),))
    if end_date < start_date:
        raise ValueError("End date %s cannot precede start date %s." %
                         (end_date.strftime("%Y-%m-%d"),
                          start_date.strftime("%Y-%m-%d")))

    if chunksize is None:
        return [(start_date, end_date)]

    start_ix, end_ix = sessions.slice_locs(start_date, end_date)
    return (
        (r[0], r[-1]) for r in partition_all(
            chunksize, sessions[start_ix:end_ix]
        )
    ) 
开发者ID:enigmampc,项目名称:catalyst,代码行数:41,代码来源:date_utils.py

示例11: optimize

# 需要导入模块: import toolz [as 别名]
# 或者: from toolz import partition_all [as 别名]
def optimize(model, sampler, train, valid):
    """
    Optimize the model. TODO: implement early-stopping
    :param model: model to optimize
    :param sampler: mini-batch sampler
    :param train: train user-item matrix
    :param valid: validation user-item matrix
    :return: None
    """
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    if model.feature_projection is not None:
        # initialize item embedding with feature projection
        sess.run(tf.assign(model.item_embeddings, model.feature_projection))

    # sample some users to calculate recall validation
    valid_users = numpy.random.choice(list(set(valid.nonzero()[0])), size=1000, replace=False)

    while True:
        # create evaluator on validation set
        validation_recall = RecallEvaluator(model, train, valid)
        # compute recall on validate set
        valid_recalls = []

        # compute recall in chunks to utilize speedup provided by Tensorflow
        for user_chunk in toolz.partition_all(100, valid_users):
            valid_recalls.extend([validation_recall.eval(sess, user_chunk)])
        print("\nRecall on (sampled) validation set: {}".format(numpy.mean(valid_recalls)))
        # TODO: early stopping based on validation recall

        # train model
        losses = []
        # run n mini-batches
        for _ in tqdm(range(EVALUATION_EVERY_N_BATCHES), desc="Optimizing..."):
            user_pos, neg = sampler.next_batch()
            _, loss = sess.run((model.optimize, model.loss),
                               {model.user_positive_items_pairs: user_pos,
                                model.negative_samples: neg})

            losses.append(loss)

        print("\nTraining loss {}".format(numpy.mean(losses))) 
开发者ID:changun,项目名称:CollMetric,代码行数:44,代码来源:CML.py


注:本文中的toolz.partition_all方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。