本文整理汇总了Python中pbcore.io.SubreadSet.split方法的典型用法代码示例。如果您正苦于以下问题:Python SubreadSet.split方法的具体用法?Python SubreadSet.split怎么用?Python SubreadSet.split使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pbcore.io.SubreadSet
的用法示例。
在下文中一共展示了SubreadSet.split方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_multi_movie_split_zmws_with_existing_movie_filter
# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import split [as 别名]
def test_multi_movie_split_zmws_with_existing_movie_filter(self):
# TODO: test with three movies and two chunks
N_RECORDS = 959539
test_file_1 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/"
"2372215/0007/Analysis_Results/m150404_101626_42"
"267_c100807920800000001823174110291514_s1_p0.al"
"l.subreadset.xml")
test_file_2 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/"
"2590980/0008/Analysis_Results/m141115_075238_et"
"han_c100699872550000001823139203261572_s1_p0.al"
"l.subreadset.xml")
ds1 = SubreadSet(test_file_1, test_file_2)
dss = ds1.split_movies(2)
self.assertEqual(len(dss), 2)
ds1 = dss[0]
# used to get total:
#self.assertEqual(sum(1 for _ in ds1), N_RECORDS)
self.assertEqual(len(ds1), N_RECORDS)
dss = ds1.split(chunks=1, zmws=True)
self.assertEqual(len(dss), 1)
self.assertEqual(sum([len(ds_) for ds_ in dss]),
N_RECORDS)
dss = ds1.split(chunks=12, zmws=True)
self.assertEqual(len(dss), 12)
self.assertEqual(sum([len(ds_) for ds_ in dss]),
N_RECORDS)
for ds in dss:
self.assertEqual(
ds.zmwRanges[0][0],
'm150404_101626_42267_c100807920800000001823174110291514_s1_p0')
示例2: test_multi_movie_split_zmws
# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import split [as 别名]
def test_multi_movie_split_zmws(self):
N_RECORDS = 1745161
test_file_1 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/"
"2372215/0007/Analysis_Results/m150404_101626_42"
"267_c100807920800000001823174110291514_s1_p0.al"
"l.subreadset.xml")
test_file_2 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/"
"2590980/0008/Analysis_Results/m141115_075238_et"
"han_c100699872550000001823139203261572_s1_p0.al"
"l.subreadset.xml")
ds1 = SubreadSet(test_file_1, test_file_2)
# used to get total:
#self.assertEqual(sum(1 for _ in ds1), N_RECORDS)
self.assertEqual(len(ds1), N_RECORDS)
dss = ds1.split(chunks=1, zmws=True)
self.assertEqual(len(dss), 1)
self.assertEqual(sum([len(ds_) for ds_ in dss]),
N_RECORDS)
dss = ds1.split(chunks=12, zmws=True)
self.assertEqual(len(dss), 12)
self.assertEqual(sum([len(ds_) for ds_ in dss]),
N_RECORDS)
self.assertEqual(
dss[0].zmwRanges,
[('m150404_101626_42267_c100807920800000001823174110291514_s1_p0',
7, 22099)])
self.assertEqual(
dss[-1].zmwRanges,
[('m141115_075238_ethan_c100699872550000001823139203261572_s1_p0',
127819, 163468)])
示例3: test_barcode_split_cornercases
# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import split [as 别名]
def test_barcode_split_cornercases(self):
fn = ('/pbi/dept/secondary/siv/testdata/'
'pblaa-unittest/Sequel/Phi29/m54008_160219_003234'
'.tiny.subreadset.xml')
sset = SubreadSet(fn)
ssets = sset.split(chunks=3, barcodes=True)
self.assertEqual([str(ss.filters) for ss in ssets],
["( bc = [0, 0] )",
"( bc = [1, 1] )",
"( bc = [2, 2] )"])
sset = SubreadSet(fn)
self.assertEqual(len(sset), 15133)
sset.filters = None
self.assertEqual(str(sset.filters), "")
sset.updateCounts()
self.assertEqual(len(sset), 2667562)
sset.filters.addRequirement(bc=[('=', '[2, 2]')])
self.assertEqual(str(sset.filters), "( bc = [2, 2] )")
sset.updateCounts()
self.assertEqual(len(sset), 4710)
sset.filters = None
self.assertEqual(str(sset.filters), "")
sset.updateCounts()
self.assertEqual(len(sset), 2667562)
sset.filters.addRequirement(bc=[('=', '[2,2]')])
self.assertEqual(str(sset.filters), "( bc = [2,2] )")
sset.updateCounts()
self.assertEqual(len(sset), 4710)
示例4: run
# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import split [as 别名]
def run(subreadset, fofn):
dir_name = os.getcwd()
maxChunks = 0
dset = SubreadSet(subreadset, strict=True)
fns = dset.toFofn()
import pprint
log.info('resources in {!r}:\n{}'.format(subreadset, pprint.pformat(fns)))
nrecs = len(dset)
# HG with 70x coverage => 200G bases total
ts = 50000 # @ 20k/read => 1G bases, ~300MB .gz => ~200 chunks for Human
ts = 500000 # @ 20k/read => 10G bases, ~3GB .gz => ~20 chunks for Human
# and we expect about 7-10min per chunk.
chunks = nrecs // ts
log.info('num_chunks={:g} ({:g} / {:g})'.format(chunks, nrecs, ts))
log.info('Splitting with dset.split(zmws=False, chunks={}, ignoreSubDatasets=True, maxChunks={},)'.format(
chunks, maxChunks))
dset_chunks = dset.split(zmws=False, chunks=chunks, ignoreSubDatasets=True, maxChunks=maxChunks,
updateCounts=False,
#targetSize=1, breakContigs=True
)
chunk_fns = []
for i, dset in enumerate(dset_chunks):
chunk_name = 'chunk_{:03d}.subreadset.xml'.format(i) # TODO: 02
chunk_fn = os.path.join(dir_name, chunk_name)
dset.updateCounts()
dset.write(chunk_fn, validate=False) # , relPaths=True
chunk_fns.append(chunk_fn)
with open(fofn, 'w') as ofs:
for fn in chunk_fns:
ofs.write('{}\n'.format(fn))
log.info('Wrote {} chunks into "{}"'.format(len(dset_chunks), fofn))
示例5: test_barcode_split_maxChunks
# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import split [as 别名]
def test_barcode_split_maxChunks(self):
fn = ('/pbi/dept/secondary/siv/testdata/'
'pblaa-unittest/Sequel/Phi29/m54008_160219_003234'
'.tiny.subreadset.xml')
sset = SubreadSet(fn, skipMissing=True)
ssets = sset.split(maxChunks=2, barcodes=True)
self.assertEqual([str(ss.filters) for ss in ssets],
["( bc = [0, 0] )",
"( bc = [1, 1] ) OR ( bc = [2, 2] )"])
sset = SubreadSet(fn, skipMissing=True)
self.assertEqual(len(sset), 15133)
sset.filters = None
self.assertEqual(str(sset.filters), "")
sset.updateCounts()
self.assertEqual(len(sset), 2667562)
sset.filters = ssets[0].filters
self.assertEqual(str(sset.filters), "( bc = [0, 0] )")
sset.updateCounts()
self.assertEqual(len(sset), 5370)
sset.filters = None
self.assertEqual(str(sset.filters), "")
sset.updateCounts()
self.assertEqual(len(sset), 2667562)
sset.filters = ssets[1].filters
self.assertEqual(str(sset.filters),
"( bc = [1, 1] ) OR ( bc = [2, 2] )")
sset.updateCounts()
self.assertEqual(len(sset), 9763)
示例6: test_multi_movie_split_zmws_existing_filters
# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import split [as 别名]
def test_multi_movie_split_zmws_existing_filters(self):
N_RECORDS = 1745161
test_file_1 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/"
"2372215/0007/Analysis_Results/m150404_101626_42"
"267_c100807920800000001823174110291514_s1_p0.al"
"l.subreadset.xml")
test_file_2 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/"
"2590980/0008/Analysis_Results/m141115_075238_et"
"han_c100699872550000001823139203261572_s1_p0.al"
"l.subreadset.xml")
ds1 = SubreadSet(test_file_1, test_file_2)
# used to get total:
#self.assertEqual(sum(1 for _ in ds1), N_RECORDS)
self.assertEqual(len(ds1), N_RECORDS)
ds1.filters.addRequirement(
movie=[('=',
'm150404_101626_42267_c100807920800000001823174110291514_s1_p0'),
('=',
'm141115_075238_ethan_c100699872550000001823139203261572_s1_p0')],
zm=[('>', 10), ('>', 127900)])
ds1.filters.mapRequirement(
zm=[('<', 10000), ('<', 140000)])
FILT_RECORDS = 117776
self.assertEqual(len(ds1), FILT_RECORDS)
ds1._index = None
ds1.updateCounts()
self.assertEqual(len(ds1), FILT_RECORDS)
dss = ds1.split(chunks=1, zmws=True)
self.assertEqual(len(dss), 1)
self.assertEqual(len(dss[0]), FILT_RECORDS)
self.assertEqual(sum([len(ds_) for ds_ in dss]),
FILT_RECORDS)
dss = ds1.split(chunks=12, zmws=True)
self.assertEqual(len(dss), 12)
self.assertEqual(sum([len(ds_) for ds_ in dss]),
FILT_RECORDS)
self.assertEqual(
dss[0].zmwRanges,
[('m150404_101626_42267_c100807920800000001823174110291514_s1_p0',
11, 1515)])
self.assertEqual(
dss[-1].zmwRanges,
[('m141115_075238_ethan_c100699872550000001823139203261572_s1_p0',
137634, 139999)])
示例7: test_subreadset_split_metadata_element_name
# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import split [as 别名]
def test_subreadset_split_metadata_element_name(self):
fn = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
log.debug(fn)
sset = SubreadSet(data.getXml(10),
data.getXml(13))
chunks = sset.split(chunks=5, zmws=False, ignoreSubDatasets=True)
self.assertEqual(len(chunks), 2)
chunks[0].write(fn)
示例8: test_subreadset_split_metadata_element_name
# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import split [as 别名]
def test_subreadset_split_metadata_element_name(self):
fn = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
log.debug(fn)
sset = SubreadSet("/pbi/dept/secondary/siv/testdata/"
"SA3-Sequel/phi29/315/3150101/"
"r54008_20160219_002905/1_A01/"
"m54008_160219_003234.subreadset.xml")
chunks = sset.split(chunks=5, zmws=False, ignoreSubDatasets=True)
chunks[0].write(fn)
示例9: setUp
# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import split [as 别名]
def setUp(self):
BAM_IN = pbcore.data.getUnalignedBam()
ds = SubreadSet(BAM_IN, strict=True)
chunks = ds.split(zmws=True, chunks=2, targetSize=2)
assert len(chunks) == 2
self.zmw_range = chunks[CHUNK_INDEX].zmwRanges[0][1:3]
logging.info("zmwRanges[CHUNK_INDEX] = {r}".format(
r=str(chunks[CHUNK_INDEX].zmwRanges)))
logging.info("SubreadSet = {f}".format(f=self.INPUT_FILES[0]))
chunks[CHUNK_INDEX].write(self.INPUT_FILES[0])
示例10: to_zmw_chunked_subreadset_files
# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import split [as 别名]
def to_zmw_chunked_subreadset_files(subreadset_path, max_total_nchunks,
chunk_key, dir_name, base_name, ext):
"""Identical to to_chunked_subreadset_files, but chunks subreads by
ZMW ranges for input to pbccs."""
dset = SubreadSet(subreadset_path, strict=True)
dset_chunks = dset.split(chunks=max_total_nchunks, zmws=True)
d = {}
for i, dset in enumerate(dset_chunks):
chunk_id = '_'.join([base_name, str(i)])
chunk_name = '.'.join([chunk_id, ext])
chunk_path = os.path.join(dir_name, chunk_name)
dset.write(chunk_path)
d[chunk_key] = os.path.abspath(chunk_path)
c = PipelineChunk(chunk_id, **d)
yield c
示例11: test_huge_zmw_split
# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import split [as 别名]
def test_huge_zmw_split(self):
human = ('/pbi/dept/secondary/siv/testdata/SA3-DS/'
'human/JCV_85x_v030/jcv_85x_v030.subreadset.xml')
sset = SubreadSet(human)
ssets = sset.split(zmws=True, maxChunks=5)