当前位置: 首页>>代码示例>>Python>>正文


Python SubreadSet.split方法代码示例

本文整理汇总了Python中pbcore.io.SubreadSet.split方法的典型用法代码示例。如果您正苦于以下问题:Python SubreadSet.split方法的具体用法?Python SubreadSet.split怎么用?Python SubreadSet.split使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pbcore.io.SubreadSet的用法示例。


在下文中一共展示了SubreadSet.split方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_multi_movie_split_zmws_with_existing_movie_filter

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import split [as 别名]
    def test_multi_movie_split_zmws_with_existing_movie_filter(self):
        # TODO: test with three movies and two chunks
        N_RECORDS = 959539
        test_file_1 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/"
                       "2372215/0007/Analysis_Results/m150404_101626_42"
                       "267_c100807920800000001823174110291514_s1_p0.al"
                       "l.subreadset.xml")
        test_file_2 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/"
                       "2590980/0008/Analysis_Results/m141115_075238_et"
                       "han_c100699872550000001823139203261572_s1_p0.al"
                       "l.subreadset.xml")
        ds1 = SubreadSet(test_file_1, test_file_2)
        dss = ds1.split_movies(2)
        self.assertEqual(len(dss), 2)
        ds1 = dss[0]
        # used to get total:
        #self.assertEqual(sum(1 for _ in ds1), N_RECORDS)
        self.assertEqual(len(ds1), N_RECORDS)
        dss = ds1.split(chunks=1, zmws=True)
        self.assertEqual(len(dss), 1)
        self.assertEqual(sum([len(ds_) for ds_ in dss]),
                         N_RECORDS)

        dss = ds1.split(chunks=12, zmws=True)
        self.assertEqual(len(dss), 12)
        self.assertEqual(sum([len(ds_) for ds_ in dss]),
                         N_RECORDS)
        for ds in dss:
            self.assertEqual(
                ds.zmwRanges[0][0],
                'm150404_101626_42267_c100807920800000001823174110291514_s1_p0')
开发者ID:PacificBiosciences,项目名称:pbcore,代码行数:33,代码来源:test_pbdataset_split.py

示例2: test_multi_movie_split_zmws

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import split [as 别名]
    def test_multi_movie_split_zmws(self):
        N_RECORDS = 1745161
        test_file_1 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/"
                       "2372215/0007/Analysis_Results/m150404_101626_42"
                       "267_c100807920800000001823174110291514_s1_p0.al"
                       "l.subreadset.xml")
        test_file_2 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/"
                       "2590980/0008/Analysis_Results/m141115_075238_et"
                       "han_c100699872550000001823139203261572_s1_p0.al"
                       "l.subreadset.xml")
        ds1 = SubreadSet(test_file_1, test_file_2)
        # used to get total:
        #self.assertEqual(sum(1 for _ in ds1), N_RECORDS)
        self.assertEqual(len(ds1), N_RECORDS)
        dss = ds1.split(chunks=1, zmws=True)
        self.assertEqual(len(dss), 1)
        self.assertEqual(sum([len(ds_) for ds_ in dss]),
                         N_RECORDS)

        dss = ds1.split(chunks=12, zmws=True)
        self.assertEqual(len(dss), 12)
        self.assertEqual(sum([len(ds_) for ds_ in dss]),
                         N_RECORDS)
        self.assertEqual(
            dss[0].zmwRanges,
            [('m150404_101626_42267_c100807920800000001823174110291514_s1_p0',
              7, 22099)])
        self.assertEqual(
            dss[-1].zmwRanges,
            [('m141115_075238_ethan_c100699872550000001823139203261572_s1_p0',
              127819, 163468)])
开发者ID:PacificBiosciences,项目名称:pbcore,代码行数:33,代码来源:test_pbdataset_split.py

示例3: test_barcode_split_cornercases

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import split [as 别名]
    def test_barcode_split_cornercases(self):
        fn = ('/pbi/dept/secondary/siv/testdata/'
              'pblaa-unittest/Sequel/Phi29/m54008_160219_003234'
              '.tiny.subreadset.xml')
        sset = SubreadSet(fn)
        ssets = sset.split(chunks=3, barcodes=True)
        self.assertEqual([str(ss.filters) for ss in ssets],
                         ["( bc = [0, 0] )",
                          "( bc = [1, 1] )",
                          "( bc = [2, 2] )"])
        sset = SubreadSet(fn)
        self.assertEqual(len(sset), 15133)
        sset.filters = None
        self.assertEqual(str(sset.filters), "")
        sset.updateCounts()
        self.assertEqual(len(sset), 2667562)

        sset.filters.addRequirement(bc=[('=', '[2, 2]')])
        self.assertEqual(str(sset.filters), "( bc = [2, 2] )")
        sset.updateCounts()
        self.assertEqual(len(sset), 4710)

        sset.filters = None
        self.assertEqual(str(sset.filters), "")
        sset.updateCounts()
        self.assertEqual(len(sset), 2667562)

        sset.filters.addRequirement(bc=[('=', '[2,2]')])
        self.assertEqual(str(sset.filters), "( bc = [2,2] )")
        sset.updateCounts()
        self.assertEqual(len(sset), 4710)
开发者ID:vrainish-pacbio,项目名称:pbcore,代码行数:33,代码来源:test_pbdataset_subtypes.py

示例4: run

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import split [as 别名]
def run(subreadset, fofn):
    dir_name = os.getcwd()
    maxChunks = 0
    dset = SubreadSet(subreadset, strict=True)
    fns = dset.toFofn()
    import pprint
    log.info('resources in {!r}:\n{}'.format(subreadset, pprint.pformat(fns)))
    nrecs = len(dset)
    # HG with 70x coverage => 200G bases total
    ts = 50000 # @ 20k/read => 1G bases, ~300MB .gz => ~200 chunks for Human
    ts = 500000 # @ 20k/read => 10G bases, ~3GB .gz => ~20 chunks for Human
    # and we expect about 7-10min per chunk.
    chunks = nrecs // ts
    log.info('num_chunks={:g} ({:g} / {:g})'.format(chunks, nrecs, ts))
    log.info('Splitting with dset.split(zmws=False, chunks={}, ignoreSubDatasets=True, maxChunks={},)'.format(
        chunks, maxChunks))
    dset_chunks = dset.split(zmws=False, chunks=chunks, ignoreSubDatasets=True, maxChunks=maxChunks,
            updateCounts=False,
            #targetSize=1, breakContigs=True
    )

    chunk_fns = []
    for i, dset in enumerate(dset_chunks):
        chunk_name = 'chunk_{:03d}.subreadset.xml'.format(i) # TODO: 02
        chunk_fn = os.path.join(dir_name, chunk_name)
        dset.updateCounts()
        dset.write(chunk_fn, validate=False) # , relPaths=True
        chunk_fns.append(chunk_fn)
    with open(fofn, 'w') as ofs:
        for fn in chunk_fns:
            ofs.write('{}\n'.format(fn))
    log.info('Wrote {} chunks into "{}"'.format(len(dset_chunks), fofn))
开发者ID:PacificBiosciences,项目名称:FALCON-polish,代码行数:34,代码来源:run_bam_scatter.py

示例5: test_barcode_split_maxChunks

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import split [as 别名]
    def test_barcode_split_maxChunks(self):
        fn = ('/pbi/dept/secondary/siv/testdata/'
              'pblaa-unittest/Sequel/Phi29/m54008_160219_003234'
              '.tiny.subreadset.xml')
        sset = SubreadSet(fn, skipMissing=True)
        ssets = sset.split(maxChunks=2, barcodes=True)
        self.assertEqual([str(ss.filters) for ss in ssets],
                         ["( bc = [0, 0] )",
                          "( bc = [1, 1] ) OR ( bc = [2, 2] )"])
        sset = SubreadSet(fn, skipMissing=True)
        self.assertEqual(len(sset), 15133)
        sset.filters = None
        self.assertEqual(str(sset.filters), "")
        sset.updateCounts()
        self.assertEqual(len(sset), 2667562)


        sset.filters = ssets[0].filters
        self.assertEqual(str(sset.filters), "( bc = [0, 0] )")
        sset.updateCounts()
        self.assertEqual(len(sset), 5370)

        sset.filters = None
        self.assertEqual(str(sset.filters), "")
        sset.updateCounts()
        self.assertEqual(len(sset), 2667562)

        sset.filters = ssets[1].filters
        self.assertEqual(str(sset.filters),
                         "( bc = [1, 1] ) OR ( bc = [2, 2] )")
        sset.updateCounts()
        self.assertEqual(len(sset), 9763)
开发者ID:PacificBiosciences,项目名称:pbcore,代码行数:34,代码来源:test_pbdataset_split.py

示例6: test_multi_movie_split_zmws_existing_filters

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import split [as 别名]
    def test_multi_movie_split_zmws_existing_filters(self):
        N_RECORDS = 1745161
        test_file_1 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/"
                       "2372215/0007/Analysis_Results/m150404_101626_42"
                       "267_c100807920800000001823174110291514_s1_p0.al"
                       "l.subreadset.xml")
        test_file_2 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/"
                       "2590980/0008/Analysis_Results/m141115_075238_et"
                       "han_c100699872550000001823139203261572_s1_p0.al"
                       "l.subreadset.xml")
        ds1 = SubreadSet(test_file_1, test_file_2)
        # used to get total:
        #self.assertEqual(sum(1 for _ in ds1), N_RECORDS)
        self.assertEqual(len(ds1), N_RECORDS)
        ds1.filters.addRequirement(
            movie=[('=',
                'm150404_101626_42267_c100807920800000001823174110291514_s1_p0'),
                   ('=',
                'm141115_075238_ethan_c100699872550000001823139203261572_s1_p0')],
            zm=[('>', 10), ('>', 127900)])
        ds1.filters.mapRequirement(
            zm=[('<', 10000), ('<', 140000)])
        FILT_RECORDS = 117776
        self.assertEqual(len(ds1), FILT_RECORDS)
        ds1._index = None
        ds1.updateCounts()
        self.assertEqual(len(ds1), FILT_RECORDS)

        dss = ds1.split(chunks=1, zmws=True)

        self.assertEqual(len(dss), 1)
        self.assertEqual(len(dss[0]), FILT_RECORDS)
        self.assertEqual(sum([len(ds_) for ds_ in dss]),
                         FILT_RECORDS)

        dss = ds1.split(chunks=12, zmws=True)
        self.assertEqual(len(dss), 12)
        self.assertEqual(sum([len(ds_) for ds_ in dss]),
                         FILT_RECORDS)
        self.assertEqual(
            dss[0].zmwRanges,
            [('m150404_101626_42267_c100807920800000001823174110291514_s1_p0',
              11, 1515)])
        self.assertEqual(
            dss[-1].zmwRanges,
            [('m141115_075238_ethan_c100699872550000001823139203261572_s1_p0',
              137634, 139999)])
开发者ID:PacificBiosciences,项目名称:pbcore,代码行数:49,代码来源:test_pbdataset_split.py

示例7: test_subreadset_split_metadata_element_name

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import split [as 别名]
 def test_subreadset_split_metadata_element_name(self):
     fn = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     log.debug(fn)
     sset = SubreadSet(data.getXml(10),
                       data.getXml(13))
     chunks = sset.split(chunks=5, zmws=False, ignoreSubDatasets=True)
     self.assertEqual(len(chunks), 2)
     chunks[0].write(fn)
开发者ID:PacificBiosciences,项目名称:pbcore,代码行数:10,代码来源:test_pbdataset_split.py

示例8: test_subreadset_split_metadata_element_name

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import split [as 别名]
 def test_subreadset_split_metadata_element_name(self):
     fn = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     log.debug(fn)
     sset = SubreadSet("/pbi/dept/secondary/siv/testdata/"
                       "SA3-Sequel/phi29/315/3150101/"
                       "r54008_20160219_002905/1_A01/"
                       "m54008_160219_003234.subreadset.xml")
     chunks = sset.split(chunks=5, zmws=False, ignoreSubDatasets=True)
     chunks[0].write(fn)
开发者ID:pb-sliang,项目名称:pbcore,代码行数:11,代码来源:test_pbdataset_subtypes.py

示例9: setUp

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import split [as 别名]
 def setUp(self):
     BAM_IN = pbcore.data.getUnalignedBam()
     ds = SubreadSet(BAM_IN, strict=True)
     chunks = ds.split(zmws=True, chunks=2, targetSize=2)
     assert len(chunks) == 2
     self.zmw_range = chunks[CHUNK_INDEX].zmwRanges[0][1:3]
     logging.info("zmwRanges[CHUNK_INDEX] = {r}".format(
         r=str(chunks[CHUNK_INDEX].zmwRanges)))
     logging.info("SubreadSet = {f}".format(f=self.INPUT_FILES[0]))
     chunks[CHUNK_INDEX].write(self.INPUT_FILES[0])
开发者ID:DNApower,项目名称:pbccs,代码行数:12,代码来源:test_tool_contract.py

示例10: to_zmw_chunked_subreadset_files

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import split [as 别名]
def to_zmw_chunked_subreadset_files(subreadset_path, max_total_nchunks,
                                    chunk_key, dir_name, base_name, ext):
    """Identical to to_chunked_subreadset_files, but chunks subreads by
    ZMW ranges for input to pbccs."""
    dset = SubreadSet(subreadset_path, strict=True)
    dset_chunks = dset.split(chunks=max_total_nchunks, zmws=True)
    d = {}
    for i, dset in enumerate(dset_chunks):
        chunk_id = '_'.join([base_name, str(i)])
        chunk_name = '.'.join([chunk_id, ext])
        chunk_path = os.path.join(dir_name, chunk_name)
        dset.write(chunk_path)
        d[chunk_key] = os.path.abspath(chunk_path)
        c = PipelineChunk(chunk_id, **d)
        yield c
开发者ID:yqin22,项目名称:pbsmrtpipe,代码行数:17,代码来源:chunk_utils.py

示例11: test_huge_zmw_split

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import split [as 别名]
 def test_huge_zmw_split(self):
     human = ('/pbi/dept/secondary/siv/testdata/SA3-DS/'
              'human/JCV_85x_v030/jcv_85x_v030.subreadset.xml')
     sset = SubreadSet(human)
     ssets = sset.split(zmws=True, maxChunks=5)
开发者ID:PacificBiosciences,项目名称:pbcore,代码行数:7,代码来源:test_pbdataset_split.py


注:本文中的pbcore.io.SubreadSet.split方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。