Python io.SubreadSet类代码示例

本文整理汇总了Python中pbcore.io.SubreadSet类的典型用法代码示例。如果您正苦于以下问题：Python SubreadSet类的具体用法？Python SubreadSet怎么用？Python SubreadSet使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了SubreadSet类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: setUpClass

 def setUpClass(cls):
     super(TestToolContract, cls).setUpClass()
     ds = SubreadSet(BAM_FILE, strict=True)
     ds.write(cls.INPUT_FILES[0])
     with FastaWriter(cls.INPUT_FILES[1]) as fa_out:
         for i in range(1010):
             fa_out.writeRecord("%04d_Forward" % i, "A" * 16)

开发者ID:tkerelska，项目名称:pbreports，代码行数:7，代码来源:test_pbreports_report_barcode.py

示例2: test_multi_movie_split_zmws_with_existing_movie_filter

    def test_multi_movie_split_zmws_with_existing_movie_filter(self):
        # TODO: test with three movies and two chunks
        N_RECORDS = 959539
        test_file_1 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/"
                       "2372215/0007/Analysis_Results/m150404_101626_42"
                       "267_c100807920800000001823174110291514_s1_p0.al"
                       "l.subreadset.xml")
        test_file_2 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/"
                       "2590980/0008/Analysis_Results/m141115_075238_et"
                       "han_c100699872550000001823139203261572_s1_p0.al"
                       "l.subreadset.xml")
        ds1 = SubreadSet(test_file_1, test_file_2)
        dss = ds1.split_movies(2)
        self.assertEqual(len(dss), 2)
        ds1 = dss[0]
        # used to get total:
        #self.assertEqual(sum(1 for _ in ds1), N_RECORDS)
        self.assertEqual(len(ds1), N_RECORDS)
        dss = ds1.split(chunks=1, zmws=True)
        self.assertEqual(len(dss), 1)
        self.assertEqual(sum([len(ds_) for ds_ in dss]),
                         N_RECORDS)

        dss = ds1.split(chunks=12, zmws=True)
        self.assertEqual(len(dss), 12)
        self.assertEqual(sum([len(ds_) for ds_ in dss]),
                         N_RECORDS)
        for ds in dss:
            self.assertEqual(
                ds.zmwRanges[0][0],
                'm150404_101626_42267_c100807920800000001823174110291514_s1_p0')

开发者ID:PacificBiosciences，项目名称:pbcore，代码行数:31，代码来源:test_pbdataset_split.py

示例3: test_multi_movie_split_zmws

    def test_multi_movie_split_zmws(self):
        N_RECORDS = 1745161
        test_file_1 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/"
                       "2372215/0007/Analysis_Results/m150404_101626_42"
                       "267_c100807920800000001823174110291514_s1_p0.al"
                       "l.subreadset.xml")
        test_file_2 = ("/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/"
                       "2590980/0008/Analysis_Results/m141115_075238_et"
                       "han_c100699872550000001823139203261572_s1_p0.al"
                       "l.subreadset.xml")
        ds1 = SubreadSet(test_file_1, test_file_2)
        # used to get total:
        #self.assertEqual(sum(1 for _ in ds1), N_RECORDS)
        self.assertEqual(len(ds1), N_RECORDS)
        dss = ds1.split(chunks=1, zmws=True)
        self.assertEqual(len(dss), 1)
        self.assertEqual(sum([len(ds_) for ds_ in dss]),
                         N_RECORDS)

        dss = ds1.split(chunks=12, zmws=True)
        self.assertEqual(len(dss), 12)
        self.assertEqual(sum([len(ds_) for ds_ in dss]),
                         N_RECORDS)
        self.assertEqual(
            dss[0].zmwRanges,
            [('m150404_101626_42267_c100807920800000001823174110291514_s1_p0',
              7, 22099)])
        self.assertEqual(
            dss[-1].zmwRanges,
            [('m141115_075238_ethan_c100699872550000001823139203261572_s1_p0',
              127819, 163468)])

开发者ID:PacificBiosciences，项目名称:pbcore，代码行数:31，代码来源:test_pbdataset_split.py

示例4: run_bax_to_bam

def run_bax_to_bam(input_file_name, output_file_name):
    with HdfSubreadSet(input_file_name) as ds_in:
        movies = set()
        for rr in ds_in.resourceReaders():
            movies.add(rr.movieName)
        if len(movies) > 1:
            out_dir = os.path.dirname(output_file_name)
            ds_out_files = []
            for bax_file in ds_in.toExternalFiles():
                output_file_name_tmp = os.path.join(out_dir, ".".join(
                    os.path.basename(bax_file).split(".")[:-2]) +
                    ".hdfsubreadset.xml")
                rc = _run_bax_to_bam(bax_file, output_file_name_tmp)
                if rc != 0:
                    log.error("bax2bam failed")
                    return rc
                ds_out_files.append(output_file_name_tmp)
            ds = SubreadSet(*ds_out_files)
            ds.name = ds_in.name
            if 'Description' in ds_in.objMetadata:
                ds.objMetadata['Description'] = ds_in.objMetadata['Description']
                ds.metadata.merge(ds_in.metadata)
            ds.write(output_file_name)
        else:
            return _run_bax_to_bam(input_file_name, output_file_name)
    return 0

开发者ID:natechols，项目名称:pbcoretools，代码行数:26，代码来源:converters.py

示例5: test_de_novo

    def test_de_novo(self):
        ofn = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
        log.info(ofn)
        ss = SubreadSet(data.getXml(10))
        col = CollectionMetadata()
        self.assertFalse(ss.metadata.collections)

        ss.metadata.collections.append(col)
        self.assertTrue(ss.metadata.collections)

        col.cellIndex = 1
        self.assertTrue(ss.metadata.collections[0].cellIndex, 1)

        col.instrumentName = "foo"
        self.assertTrue(ss.metadata.collections[0].instrumentName, "foo")

        col.context = 'bar'
        self.assertTrue(ss.metadata.collections[0].context, "bar")

        ss.metadata.collections[0].runDetails.name = 'foo'
        self.assertEqual('foo', ss.metadata.collections[0].runDetails.name)

        ss.metadata.collections[0].wellSample.name = 'bar'
        self.assertEqual('bar', ss.metadata.collections[0].wellSample.name)

        ss.metadata.collections[0].wellSample.wellName = 'baz'
        self.assertEqual('baz', ss.metadata.collections[0].wellSample.wellName)

        ss.metadata.collections[0].wellSample.concentration = 'baz'
        self.assertEqual('baz',
                         ss.metadata.collections[0].wellSample.concentration)
        ss.write(ofn, validate=False)

开发者ID:pb-sliang，项目名称:pbcore，代码行数:32，代码来源:test_pbdataset_metadata.py

示例6: test_subreadset_from_bam

 def test_subreadset_from_bam(self):
     # DONE control experiment for bug 28698
     bam = upstreamData.getUnalignedBam()
     ds1 = SubreadSet(bam, strict=False)
     fn = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     log.debug(fn)
     ds1.write(fn)

开发者ID:vrainish-pacbio，项目名称:pbcore，代码行数:7，代码来源:test_pbdataset_subtypes.py

示例7: get_sequencing_chemistry

def get_sequencing_chemistry(entry_points, include_system_type=True):
    """
    Given a list of entry points (eid, path), extract the sequencing chemistry
    (and optionally system name) as a human-readable string.
    """
    chemistries = set()
    is_sequel = is_rsii = False
    for eid, path in entry_points:
        if eid == "eid_subread" and op.isfile(path):
            ds = SubreadSet(path)
            for bam in ds.resourceReaders():
                for rg in bam.readGroupTable:
                    chemistries.add(rg.SequencingChemistry)
                    if rg.SequencingChemistry.startswith("S"):
                        is_sequel = True
                    else:
                        is_rsii = True
    if len(chemistries) == 0:
        return "NA"
    chemistry_str = "; ".join(sorted(list(chemistries)))
    if include_system_type:
        fmt = "{s} ({c})"
        if is_sequel and is_rsii:
            return fmt.format(s="Mixed", c=chemistry_str)
        elif is_sequel:
            return fmt.format(s="Sequel", c=chemistry_str)
        elif is_rsii:
            return fmt.format(s="RSII", c=chemistry_str)
        else:
            raise ValueError("Can't determine system type for {c}".format(
                             c=chemistry_str))
    return chemistry_str

开发者ID:lpp1985，项目名称:lpp_Script，代码行数:32，代码来源:harvesting.py

示例8: test_len

    def test_len(self):
        # AlignmentSet
        aln = AlignmentSet(data.getXml(8), strict=True)
        self.assertEqual(len(aln), 92)
        self.assertEqual(aln._length, (92, 123588))
        self.assertEqual(aln.totalLength, 123588)
        self.assertEqual(aln.numRecords, 92)
        aln.totalLength = -1
        aln.numRecords = -1
        self.assertEqual(aln.totalLength, -1)
        self.assertEqual(aln.numRecords, -1)
        aln.updateCounts()
        self.assertEqual(aln.totalLength, 123588)
        self.assertEqual(aln.numRecords, 92)
        self.assertEqual(sum(1 for _ in aln), 92)
        self.assertEqual(sum(len(rec) for rec in aln), 123588)

        # AlignmentSet with filters
        aln = AlignmentSet(data.getXml(15), strict=True)
        self.assertEqual(len(aln), 40)
        self.assertEqual(aln._length, (40, 52023))
        self.assertEqual(aln.totalLength, 52023)
        self.assertEqual(aln.numRecords, 40)
        aln.totalLength = -1
        aln.numRecords = -1
        self.assertEqual(aln.totalLength, -1)
        self.assertEqual(aln.numRecords, -1)
        aln.updateCounts()
        self.assertEqual(aln.totalLength, 52023)
        self.assertEqual(aln.numRecords, 40)

        # SubreadSet
        sset = SubreadSet(data.getXml(10), strict=True)
        self.assertEqual(len(sset), 92)
        self.assertEqual(sset._length, (92, 124093))
        self.assertEqual(sset.totalLength, 124093)
        self.assertEqual(sset.numRecords, 92)
        sset.totalLength = -1
        sset.numRecords = -1
        self.assertEqual(sset.totalLength, -1)
        self.assertEqual(sset.numRecords, -1)
        sset.updateCounts()
        self.assertEqual(sset.totalLength, 124093)
        self.assertEqual(sset.numRecords, 92)
        self.assertEqual(sum(1 for _ in sset), 92)
        self.assertEqual(sum(len(rec) for rec in sset), 124093)

        # ReferenceSet
        sset = ReferenceSet(data.getXml(9), strict=True)
        self.assertEqual(len(sset), 59)
        self.assertEqual(sset.totalLength, 85774)
        self.assertEqual(sset.numRecords, 59)
        sset.totalLength = -1
        sset.numRecords = -1
        self.assertEqual(sset.totalLength, -1)
        self.assertEqual(sset.numRecords, -1)
        sset.updateCounts()
        self.assertEqual(sset.totalLength, 85774)
        self.assertEqual(sset.numRecords, 59)

开发者ID:PacificBiosciences，项目名称:pbcore，代码行数:59，代码来源:test_pbdataset_subtypes.py

示例9: test_subreadset_split_metadata_element_name

 def test_subreadset_split_metadata_element_name(self):
     fn = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     log.debug(fn)
     sset = SubreadSet(data.getXml(10),
                       data.getXml(13))
     chunks = sset.split(chunks=5, zmws=False, ignoreSubDatasets=True)
     self.assertEqual(len(chunks), 2)
     chunks[0].write(fn)

开发者ID:PacificBiosciences，项目名称:pbcore，代码行数:8，代码来源:test_pbdataset_split.py

示例10: test_subreadset_split_metadata_element_name

 def test_subreadset_split_metadata_element_name(self):
     fn = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     log.debug(fn)
     sset = SubreadSet("/pbi/dept/secondary/siv/testdata/"
                       "SA3-Sequel/phi29/315/3150101/"
                       "r54008_20160219_002905/1_A01/"
                       "m54008_160219_003234.subreadset.xml")
     chunks = sset.split(chunks=5, zmws=False, ignoreSubDatasets=True)
     chunks[0].write(fn)

开发者ID:pb-sliang，项目名称:pbcore，代码行数:9，代码来源:test_pbdataset_subtypes.py

示例11: setUpClass

 def setUpClass(cls):
     tmp_bam = tempfile.NamedTemporaryFile(suffix=".subreads.bam").name
     shutil.copyfile(pbcore.data.getUnalignedBam(), tmp_bam)
     shutil.copyfile(pbcore.data.getUnalignedBam()+".pbi", tmp_bam+".pbi")
     ds = SubreadSet(tmp_bam, pbcore.data.getUnalignedBam(), strict=True)
     ds.write(cls.INPUT_FILES[0])
     _write_fasta_or_contigset(cls.INPUT_FILES[1], make_faidx=True,
                               ds_class=BarcodeSet)
     super(TestScatterSubreadBAMs, cls).setUpClass()

开发者ID:WenchaoLin，项目名称:pbcoretools，代码行数:9，代码来源:test_tasks_scatter_gather.py

示例12: _make_dataset

def _make_dataset(file_name=None, barcodes=None):
    if file_name is None:
        file_name = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
    ds = SubreadSet(BAM_FILE, strict=True)
    if barcodes is not None:
        for er in ds.externalResources:
            er.barcodes = barcodes
    ds.write(file_name)
    return file_name

开发者ID:vrainish-pacbio，项目名称:pbreports，代码行数:9，代码来源:test_pbreports_report_barcode.py

示例13: test_provenance_record_ordering

 def test_provenance_record_ordering(self):
     import pbtestdata
     ds = SubreadSet(pbtestdata.get_file("subreads-sequel"), strict=True)
     ds.metadata.addParentDataSet(uuid.uuid4(), ds.datasetType, createdBy="AnalysisJob", timeStampedName="")
     tmp_out = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     ds.write(tmp_out)
     ds = SubreadSet(tmp_out, strict=True)
     tags = [r['tag'] for r in ds.metadata.record['children']]
     self.assertEqual(tags, ['TotalLength', 'NumRecords', 'Provenance', 'Collections', 'SummaryStats'])

开发者ID:PacificBiosciences，项目名称:pbcore，代码行数:9，代码来源:test_pbdataset_subtypes.py

示例14: to_report

def to_report(stats_xml, output_dir, dpi=72):
    """Main point of entry

    :type stats_xml: str
    :type output_dir: str
    :type dpi: int

    :rtype: Report
    """
    log.info("Analyzing XML {f}".format(f=stats_xml))
    # stats_xml should be a dataset:
    dset = SubreadSet(stats_xml)

    dataset_uuids = [dset.uuid]
    # but if it isn't, no problem:
    if not dset.metadata.summaryStats:
        dset.loadStats(stats_xml)
        # an sts file was provided which will generate a new random uuid
        dataset_uuids = []
    if not dset.metadata.summaryStats.readLenDists:
        raise IOError("Pipeline Summary Stats (sts.xml) not found or missing "
                      "key distributions")


    # we want all of the length distributions in this report to look the same,
    # so we make the shaper here and pass it around:
    alldists = (dset.metadata.summaryStats.readLenDists[:] +
                dset.metadata.summaryStats.insertReadLenDists[:])
    len_dist_shaper = continuous_dist_shaper(alldists, trim_excess=True)

    attr = to_read_stats_attributes(
        readLenDists=dset.metadata.summaryStats.readLenDists,
        readQualDists=dset.metadata.summaryStats.readQualDists)
    attr.extend(to_insert_stats_attributes(
        readLenDists=dset.metadata.summaryStats.insertReadLenDists,
        readQualDists=dset.metadata.summaryStats.insertReadQualDists))

    plot_groups = to_read_stats_plots(
        readLenDists=dset.metadata.summaryStats.readLenDists,
        readQualDists=dset.metadata.summaryStats.readQualDists,
        output_dir=output_dir,
        lenDistShaper=len_dist_shaper)
    plot_groups.extend(to_insert_stats_plots(
        readLenDists=dset.metadata.summaryStats.insertReadLenDists,
        readQualDists=dset.metadata.summaryStats.insertReadQualDists,
        output_dir=output_dir,
        lenDistShaper=len_dist_shaper))

    # build the report:
    report = Report(meta_rpt.id,
                    title=meta_rpt.title,
                    attributes=attr,
                    plotgroups=plot_groups,
                    dataset_uuids=dataset_uuids)

    return meta_rpt.apply_view(report)

开发者ID:mdsmith，项目名称:pbreports，代码行数:56，代码来源:filter_stats_xml.py

示例15: test_get_dataset_uuid

 def test_get_dataset_uuid(self):
     ds = SubreadSet(upstreamdata.getUnalignedBam(), strict=True)
     ds_file = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     ds.write(ds_file)
     uuid = getDataSetUuid(ds_file)
     self.assertEqual(uuid, ds.uuid)
     with open(ds_file, "w") as out:
         out.write("hello world!")
     uuid = getDataSetUuid(ds_file)
     self.assertEqual(uuid, None)

开发者ID:PacificBiosciences，项目名称:pbcore，代码行数:10，代码来源:test_pbdataset_utils.py

注：本文中的pbcore.io.SubreadSet类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。