本文整理汇总了Python中pbcore.io.SubreadSet类的典型用法代码示例。如果您正苦于以下问题:Python SubreadSet类的具体用法?Python SubreadSet怎么用?Python SubreadSet使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SubreadSet类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: setUpClass
def setUpClass(cls):
super(TestToolContract, cls).setUpClass()
ds = SubreadSet(BAM_FILE, strict=True)
ds.write(cls.INPUT_FILES[0])
with FastaWriter(cls.INPUT_FILES[1]) as fa_out:
for i in range(1010):
fa_out.writeRecord("%04d_Forward" % i, "A" * 16)
示例2: test_multi_movie_split_zmws_with_existing_movie_filter
def test_multi_movie_split_zmws_with_existing_movie_filter(self):
# TODO: test with three movies and two chunks
N_RECORDS = 959539
test_file_1 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/"
"2372215/0007/Analysis_Results/m150404_101626_42"
"267_c100807920800000001823174110291514_s1_p0.al"
"l.subreadset.xml")
test_file_2 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/"
"2590980/0008/Analysis_Results/m141115_075238_et"
"han_c100699872550000001823139203261572_s1_p0.al"
"l.subreadset.xml")
ds1 = SubreadSet(test_file_1, test_file_2)
dss = ds1.split_movies(2)
self.assertEqual(len(dss), 2)
ds1 = dss[0]
# used to get total:
#self.assertEqual(sum(1 for _ in ds1), N_RECORDS)
self.assertEqual(len(ds1), N_RECORDS)
dss = ds1.split(chunks=1, zmws=True)
self.assertEqual(len(dss), 1)
self.assertEqual(sum([len(ds_) for ds_ in dss]),
N_RECORDS)
dss = ds1.split(chunks=12, zmws=True)
self.assertEqual(len(dss), 12)
self.assertEqual(sum([len(ds_) for ds_ in dss]),
N_RECORDS)
for ds in dss:
self.assertEqual(
ds.zmwRanges[0][0],
'm150404_101626_42267_c100807920800000001823174110291514_s1_p0')
示例3: test_multi_movie_split_zmws
def test_multi_movie_split_zmws(self):
N_RECORDS = 1745161
test_file_1 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/"
"2372215/0007/Analysis_Results/m150404_101626_42"
"267_c100807920800000001823174110291514_s1_p0.al"
"l.subreadset.xml")
test_file_2 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/"
"2590980/0008/Analysis_Results/m141115_075238_et"
"han_c100699872550000001823139203261572_s1_p0.al"
"l.subreadset.xml")
ds1 = SubreadSet(test_file_1, test_file_2)
# used to get total:
#self.assertEqual(sum(1 for _ in ds1), N_RECORDS)
self.assertEqual(len(ds1), N_RECORDS)
dss = ds1.split(chunks=1, zmws=True)
self.assertEqual(len(dss), 1)
self.assertEqual(sum([len(ds_) for ds_ in dss]),
N_RECORDS)
dss = ds1.split(chunks=12, zmws=True)
self.assertEqual(len(dss), 12)
self.assertEqual(sum([len(ds_) for ds_ in dss]),
N_RECORDS)
self.assertEqual(
dss[0].zmwRanges,
[('m150404_101626_42267_c100807920800000001823174110291514_s1_p0',
7, 22099)])
self.assertEqual(
dss[-1].zmwRanges,
[('m141115_075238_ethan_c100699872550000001823139203261572_s1_p0',
127819, 163468)])
示例4: run_bax_to_bam
def run_bax_to_bam(input_file_name, output_file_name):
with HdfSubreadSet(input_file_name) as ds_in:
movies = set()
for rr in ds_in.resourceReaders():
movies.add(rr.movieName)
if len(movies) > 1:
out_dir = os.path.dirname(output_file_name)
ds_out_files = []
for bax_file in ds_in.toExternalFiles():
output_file_name_tmp = os.path.join(out_dir, ".".join(
os.path.basename(bax_file).split(".")[:-2]) +
".hdfsubreadset.xml")
rc = _run_bax_to_bam(bax_file, output_file_name_tmp)
if rc != 0:
log.error("bax2bam failed")
return rc
ds_out_files.append(output_file_name_tmp)
ds = SubreadSet(*ds_out_files)
ds.name = ds_in.name
if 'Description' in ds_in.objMetadata:
ds.objMetadata['Description'] = ds_in.objMetadata['Description']
ds.metadata.merge(ds_in.metadata)
ds.write(output_file_name)
else:
return _run_bax_to_bam(input_file_name, output_file_name)
return 0
示例5: test_de_novo
def test_de_novo(self):
ofn = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
log.info(ofn)
ss = SubreadSet(data.getXml(10))
col = CollectionMetadata()
self.assertFalse(ss.metadata.collections)
ss.metadata.collections.append(col)
self.assertTrue(ss.metadata.collections)
col.cellIndex = 1
self.assertTrue(ss.metadata.collections[0].cellIndex, 1)
col.instrumentName = "foo"
self.assertTrue(ss.metadata.collections[0].instrumentName, "foo")
col.context = 'bar'
self.assertTrue(ss.metadata.collections[0].context, "bar")
ss.metadata.collections[0].runDetails.name = 'foo'
self.assertEqual('foo', ss.metadata.collections[0].runDetails.name)
ss.metadata.collections[0].wellSample.name = 'bar'
self.assertEqual('bar', ss.metadata.collections[0].wellSample.name)
ss.metadata.collections[0].wellSample.wellName = 'baz'
self.assertEqual('baz', ss.metadata.collections[0].wellSample.wellName)
ss.metadata.collections[0].wellSample.concentration = 'baz'
self.assertEqual('baz',
ss.metadata.collections[0].wellSample.concentration)
ss.write(ofn, validate=False)
示例6: test_subreadset_from_bam
def test_subreadset_from_bam(self):
# DONE control experiment for bug 28698
bam = upstreamData.getUnalignedBam()
ds1 = SubreadSet(bam, strict=False)
fn = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
log.debug(fn)
ds1.write(fn)
示例7: get_sequencing_chemistry
def get_sequencing_chemistry(entry_points, include_system_type=True):
"""
Given a list of entry points (eid, path), extract the sequencing chemistry
(and optionally system name) as a human-readable string.
"""
chemistries = set()
is_sequel = is_rsii = False
for eid, path in entry_points:
if eid == "eid_subread" and op.isfile(path):
ds = SubreadSet(path)
for bam in ds.resourceReaders():
for rg in bam.readGroupTable:
chemistries.add(rg.SequencingChemistry)
if rg.SequencingChemistry.startswith("S"):
is_sequel = True
else:
is_rsii = True
if len(chemistries) == 0:
return "NA"
chemistry_str = "; ".join(sorted(list(chemistries)))
if include_system_type:
fmt = "{s} ({c})"
if is_sequel and is_rsii:
return fmt.format(s="Mixed", c=chemistry_str)
elif is_sequel:
return fmt.format(s="Sequel", c=chemistry_str)
elif is_rsii:
return fmt.format(s="RSII", c=chemistry_str)
else:
raise ValueError("Can't determine system type for {c}".format(
c=chemistry_str))
return chemistry_str
示例8: test_len
def test_len(self):
# AlignmentSet
aln = AlignmentSet(data.getXml(8), strict=True)
self.assertEqual(len(aln), 92)
self.assertEqual(aln._length, (92, 123588))
self.assertEqual(aln.totalLength, 123588)
self.assertEqual(aln.numRecords, 92)
aln.totalLength = -1
aln.numRecords = -1
self.assertEqual(aln.totalLength, -1)
self.assertEqual(aln.numRecords, -1)
aln.updateCounts()
self.assertEqual(aln.totalLength, 123588)
self.assertEqual(aln.numRecords, 92)
self.assertEqual(sum(1 for _ in aln), 92)
self.assertEqual(sum(len(rec) for rec in aln), 123588)
# AlignmentSet with filters
aln = AlignmentSet(data.getXml(15), strict=True)
self.assertEqual(len(aln), 40)
self.assertEqual(aln._length, (40, 52023))
self.assertEqual(aln.totalLength, 52023)
self.assertEqual(aln.numRecords, 40)
aln.totalLength = -1
aln.numRecords = -1
self.assertEqual(aln.totalLength, -1)
self.assertEqual(aln.numRecords, -1)
aln.updateCounts()
self.assertEqual(aln.totalLength, 52023)
self.assertEqual(aln.numRecords, 40)
# SubreadSet
sset = SubreadSet(data.getXml(10), strict=True)
self.assertEqual(len(sset), 92)
self.assertEqual(sset._length, (92, 124093))
self.assertEqual(sset.totalLength, 124093)
self.assertEqual(sset.numRecords, 92)
sset.totalLength = -1
sset.numRecords = -1
self.assertEqual(sset.totalLength, -1)
self.assertEqual(sset.numRecords, -1)
sset.updateCounts()
self.assertEqual(sset.totalLength, 124093)
self.assertEqual(sset.numRecords, 92)
self.assertEqual(sum(1 for _ in sset), 92)
self.assertEqual(sum(len(rec) for rec in sset), 124093)
# ReferenceSet
sset = ReferenceSet(data.getXml(9), strict=True)
self.assertEqual(len(sset), 59)
self.assertEqual(sset.totalLength, 85774)
self.assertEqual(sset.numRecords, 59)
sset.totalLength = -1
sset.numRecords = -1
self.assertEqual(sset.totalLength, -1)
self.assertEqual(sset.numRecords, -1)
sset.updateCounts()
self.assertEqual(sset.totalLength, 85774)
self.assertEqual(sset.numRecords, 59)
示例9: test_subreadset_split_metadata_element_name
def test_subreadset_split_metadata_element_name(self):
fn = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
log.debug(fn)
sset = SubreadSet(data.getXml(10),
data.getXml(13))
chunks = sset.split(chunks=5, zmws=False, ignoreSubDatasets=True)
self.assertEqual(len(chunks), 2)
chunks[0].write(fn)
示例10: test_subreadset_split_metadata_element_name
def test_subreadset_split_metadata_element_name(self):
fn = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
log.debug(fn)
sset = SubreadSet("/pbi/dept/secondary/siv/testdata/"
"SA3-Sequel/phi29/315/3150101/"
"r54008_20160219_002905/1_A01/"
"m54008_160219_003234.subreadset.xml")
chunks = sset.split(chunks=5, zmws=False, ignoreSubDatasets=True)
chunks[0].write(fn)
示例11: setUpClass
def setUpClass(cls):
tmp_bam = tempfile.NamedTemporaryFile(suffix=".subreads.bam").name
shutil.copyfile(pbcore.data.getUnalignedBam(), tmp_bam)
shutil.copyfile(pbcore.data.getUnalignedBam()+".pbi", tmp_bam+".pbi")
ds = SubreadSet(tmp_bam, pbcore.data.getUnalignedBam(), strict=True)
ds.write(cls.INPUT_FILES[0])
_write_fasta_or_contigset(cls.INPUT_FILES[1], make_faidx=True,
ds_class=BarcodeSet)
super(TestScatterSubreadBAMs, cls).setUpClass()
示例12: _make_dataset
def _make_dataset(file_name=None, barcodes=None):
if file_name is None:
file_name = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
ds = SubreadSet(BAM_FILE, strict=True)
if barcodes is not None:
for er in ds.externalResources:
er.barcodes = barcodes
ds.write(file_name)
return file_name
示例13: test_provenance_record_ordering
def test_provenance_record_ordering(self):
import pbtestdata
ds = SubreadSet(pbtestdata.get_file("subreads-sequel"), strict=True)
ds.metadata.addParentDataSet(uuid.uuid4(), ds.datasetType, createdBy="AnalysisJob", timeStampedName="")
tmp_out = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
ds.write(tmp_out)
ds = SubreadSet(tmp_out, strict=True)
tags = [r['tag'] for r in ds.metadata.record['children']]
self.assertEqual(tags, ['TotalLength', 'NumRecords', 'Provenance', 'Collections', 'SummaryStats'])
示例14: to_report
def to_report(stats_xml, output_dir, dpi=72):
"""Main point of entry
:type stats_xml: str
:type output_dir: str
:type dpi: int
:rtype: Report
"""
log.info("Analyzing XML {f}".format(f=stats_xml))
# stats_xml should be a dataset:
dset = SubreadSet(stats_xml)
dataset_uuids = [dset.uuid]
# but if it isn't, no problem:
if not dset.metadata.summaryStats:
dset.loadStats(stats_xml)
# an sts file was provided which will generate a new random uuid
dataset_uuids = []
if not dset.metadata.summaryStats.readLenDists:
raise IOError("Pipeline Summary Stats (sts.xml) not found or missing "
"key distributions")
# we want all of the length distributions in this report to look the same,
# so we make the shaper here and pass it around:
alldists = (dset.metadata.summaryStats.readLenDists[:] +
dset.metadata.summaryStats.insertReadLenDists[:])
len_dist_shaper = continuous_dist_shaper(alldists, trim_excess=True)
attr = to_read_stats_attributes(
readLenDists=dset.metadata.summaryStats.readLenDists,
readQualDists=dset.metadata.summaryStats.readQualDists)
attr.extend(to_insert_stats_attributes(
readLenDists=dset.metadata.summaryStats.insertReadLenDists,
readQualDists=dset.metadata.summaryStats.insertReadQualDists))
plot_groups = to_read_stats_plots(
readLenDists=dset.metadata.summaryStats.readLenDists,
readQualDists=dset.metadata.summaryStats.readQualDists,
output_dir=output_dir,
lenDistShaper=len_dist_shaper)
plot_groups.extend(to_insert_stats_plots(
readLenDists=dset.metadata.summaryStats.insertReadLenDists,
readQualDists=dset.metadata.summaryStats.insertReadQualDists,
output_dir=output_dir,
lenDistShaper=len_dist_shaper))
# build the report:
report = Report(meta_rpt.id,
title=meta_rpt.title,
attributes=attr,
plotgroups=plot_groups,
dataset_uuids=dataset_uuids)
return meta_rpt.apply_view(report)
示例15: test_get_dataset_uuid
def test_get_dataset_uuid(self):
ds = SubreadSet(upstreamdata.getUnalignedBam(), strict=True)
ds_file = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
ds.write(ds_file)
uuid = getDataSetUuid(ds_file)
self.assertEqual(uuid, ds.uuid)
with open(ds_file, "w") as out:
out.write("hello world!")
uuid = getDataSetUuid(ds_file)
self.assertEqual(uuid, None)