当前位置: 首页>>代码示例>>Python>>正文


Python SubreadSet.write方法代码示例

本文整理汇总了Python中pbcore.io.SubreadSet.write方法的典型用法代码示例。如果您正苦于以下问题:Python SubreadSet.write方法的具体用法?Python SubreadSet.write怎么用?Python SubreadSet.write使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pbcore.io.SubreadSet的用法示例。


在下文中一共展示了SubreadSet.write方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: run_bax_to_bam

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import write [as 别名]
def run_bax_to_bam(input_file_name, output_file_name):
    with HdfSubreadSet(input_file_name) as ds_in:
        movies = set()
        for rr in ds_in.resourceReaders():
            movies.add(rr.movieName)
        if len(movies) > 1:
            out_dir = os.path.dirname(output_file_name)
            ds_out_files = []
            for bax_file in ds_in.toExternalFiles():
                output_file_name_tmp = os.path.join(out_dir, ".".join(
                    os.path.basename(bax_file).split(".")[:-2]) +
                    ".hdfsubreadset.xml")
                rc = _run_bax_to_bam(bax_file, output_file_name_tmp)
                if rc != 0:
                    log.error("bax2bam failed")
                    return rc
                ds_out_files.append(output_file_name_tmp)
            ds = SubreadSet(*ds_out_files)
            ds.name = ds_in.name
            if 'Description' in ds_in.objMetadata:
                ds.objMetadata['Description'] = ds_in.objMetadata['Description']
                ds.metadata.merge(ds_in.metadata)
            ds.write(output_file_name)
        else:
            return _run_bax_to_bam(input_file_name, output_file_name)
    return 0
开发者ID:natechols,项目名称:pbcoretools,代码行数:28,代码来源:converters.py

示例2: run

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import write [as 别名]
def run(subreadset, fofn):
    dir_name = os.getcwd()
    maxChunks = 0
    dset = SubreadSet(subreadset, strict=True)
    fns = dset.toFofn()
    import pprint
    log.info('resources in {!r}:\n{}'.format(subreadset, pprint.pformat(fns)))
    nrecs = len(dset)
    # HG with 70x coverage => 200G bases total
    ts = 50000 # @ 20k/read => 1G bases, ~300MB .gz => ~200 chunks for Human
    ts = 500000 # @ 20k/read => 10G bases, ~3GB .gz => ~20 chunks for Human
    # and we expect about 7-10min per chunk.
    chunks = nrecs // ts
    log.info('num_chunks={:g} ({:g} / {:g})'.format(chunks, nrecs, ts))
    log.info('Splitting with dset.split(zmws=False, chunks={}, ignoreSubDatasets=True, maxChunks={},)'.format(
        chunks, maxChunks))
    dset_chunks = dset.split(zmws=False, chunks=chunks, ignoreSubDatasets=True, maxChunks=maxChunks,
            updateCounts=False,
            #targetSize=1, breakContigs=True
    )

    chunk_fns = []
    for i, dset in enumerate(dset_chunks):
        chunk_name = 'chunk_{:03d}.subreadset.xml'.format(i) # TODO: 02
        chunk_fn = os.path.join(dir_name, chunk_name)
        dset.updateCounts()
        dset.write(chunk_fn, validate=False) # , relPaths=True
        chunk_fns.append(chunk_fn)
    with open(fofn, 'w') as ofs:
        for fn in chunk_fns:
            ofs.write('{}\n'.format(fn))
    log.info('Wrote {} chunks into "{}"'.format(len(dset_chunks), fofn))
开发者ID:PacificBiosciences,项目名称:FALCON-polish,代码行数:34,代码来源:run_bam_scatter.py

示例3: test_subreadset_metadata_element_name

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import write [as 别名]
    def test_subreadset_metadata_element_name(self):
        # without touching the element:
        sset = SubreadSet(data.getXml(10))
        log.debug(data.getXml(10))
        fn = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
        log.debug(fn)
        sset.write(fn)
        f = ET.parse(fn)
        self.assertEqual(len(f.getroot().findall(
            '{http://pacificbiosciences.com/PacBioDatasets.xsd}'
            'SubreadSetMetadata')),
            0)
        self.assertEqual(len(f.getroot().findall(
            '{http://pacificbiosciences.com/PacBioDatasets.xsd}'
            'DataSetMetadata')),
            1)

        # with touching the element:
        sset = SubreadSet(data.getXml(10))
        sset.metadata.description = 'foo'
        fn = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
        sset.write(fn, validate=False)
        f = ET.parse(fn)
        self.assertEqual(len(f.getroot().findall(
            '{http://pacificbiosciences.com/PacBioDatasets.xsd}'
            'SubreadSetMetadata')),
            0)
        self.assertEqual(len(f.getroot().findall(
            '{http://pacificbiosciences.com/PacBioDatasets.xsd}'
            'DataSetMetadata')),
            1)
开发者ID:mdsmith,项目名称:pbcore,代码行数:33,代码来源:test_pbdataset_subtypes.py

示例4: test_de_novo

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import write [as 别名]
    def test_de_novo(self):
        ofn = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
        log.info(ofn)
        ss = SubreadSet(data.getXml(10))
        col = CollectionMetadata()
        self.assertFalse(ss.metadata.collections)

        ss.metadata.collections.append(col)
        self.assertTrue(ss.metadata.collections)

        col.cellIndex = 1
        self.assertTrue(ss.metadata.collections[0].cellIndex, 1)

        col.instrumentName = "foo"
        self.assertTrue(ss.metadata.collections[0].instrumentName, "foo")

        col.context = 'bar'
        self.assertTrue(ss.metadata.collections[0].context, "bar")

        ss.metadata.collections[0].runDetails.name = 'foo'
        self.assertEqual('foo', ss.metadata.collections[0].runDetails.name)

        ss.metadata.collections[0].wellSample.name = 'bar'
        self.assertEqual('bar', ss.metadata.collections[0].wellSample.name)

        ss.metadata.collections[0].wellSample.wellName = 'baz'
        self.assertEqual('baz', ss.metadata.collections[0].wellSample.wellName)

        ss.metadata.collections[0].wellSample.concentration = 'baz'
        self.assertEqual('baz',
                         ss.metadata.collections[0].wellSample.concentration)
        ss.write(ofn, validate=False)
开发者ID:pb-sliang,项目名称:pbcore,代码行数:34,代码来源:test_pbdataset_metadata.py

示例5: test_subreadset_from_bam

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import write [as 别名]
 def test_subreadset_from_bam(self):
     # DONE control experiment for bug 28698
     bam = upstreamData.getUnalignedBam()
     ds1 = SubreadSet(bam, strict=False)
     fn = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     log.debug(fn)
     ds1.write(fn)
开发者ID:vrainish-pacbio,项目名称:pbcore,代码行数:9,代码来源:test_pbdataset_subtypes.py

示例6: split_dataset

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import write [as 别名]
def split_dataset(subreadset, out_prefix):
    """
    Takes an input dataset, and for each entry generates one separate dataset
    file, while maintaining all the filters.
    Returns a FOFN of the generated datasets.

    To create an example filtered dataset for testing:
    dataset create --type SubreadSet test.subreadset.xml subreads1.bam subreads2.bam
    dataset filter test.subreadset.xml test.filtered.subreadset.xml 'length>1000'
    """
    out_prefix_abs = os.path.abspath(out_prefix)

    dset = SubreadSet(subreadset, strict=True)
    fns = dset.toFofn()

    log.info('resources in {!r}:\n{}'.format(subreadset, '\n'.join(fns)))

    fofn = []
    for i, bam_fn in enumerate(fns):
        out_fn = '{}.{:05}.subreadset.xml'.format(out_prefix_abs, i)
        new_dataset = SubreadSet(bam_fn)
        new_dataset.newUuid()
        new_dataset._filters = copy.deepcopy(dset._filters)
        new_dataset.write(out_fn)
        fofn.append(out_fn)

    return fofn
开发者ID:pb-cdunn,项目名称:FALCON,代码行数:29,代码来源:dataset_split.py

示例7: setUpClass

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import write [as 别名]
 def setUpClass(cls):
     super(TestToolContract, cls).setUpClass()
     ds = SubreadSet(BAM_FILE, strict=True)
     ds.write(cls.INPUT_FILES[0])
     with FastaWriter(cls.INPUT_FILES[1]) as fa_out:
         for i in range(1010):
             fa_out.writeRecord("%04d_Forward" % i, "A" * 16)
开发者ID:tkerelska,项目名称:pbreports,代码行数:9,代码来源:test_pbreports_report_barcode.py

示例8: run_bam_to_bam

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import write [as 别名]
def run_bam_to_bam(subread_set_file, barcode_set_file, output_file_name,
                   nproc=1):
    bc = BarcodeSet(barcode_set_file)
    if len(bc.resourceReaders()) > 1:
        raise NotImplementedError("Multi-FASTA BarcodeSet input is not supported.")
    barcode_fasta = bc.toExternalFiles()[0]
    with SubreadSet(subread_set_file) as ds:
        # TODO(nechols)(2016-03-15): replace with BarcodedSubreadSet
        ds_new = SubreadSet(strict=True)
        for ext_res in ds.externalResources:
            subreads_bam = ext_res.bam
            scraps_bam = ext_res.scraps
            assert subreads_bam is not None
            if scraps_bam is None:
                raise TypeError("The input SubreadSet must include scraps.")
            new_prefix = op.join(op.dirname(output_file_name),
                re.sub(".subreads.bam", "_barcoded", op.basename(subreads_bam)))
            if not op.isabs(subreads_bam):
                subreads_bam = op.join(op.dirname(subread_set_file),
                    subreads_bam)
            if not op.isabs(scraps_bam):
                scraps_bam = op.join(op.dirname(subread_set_file), scraps_bam)
            args = [
                "bam2bam",
                "-j", str(nproc),
                "-b", str(nproc),
                "-o", new_prefix,
                "--barcodes", barcode_fasta,
                subreads_bam, scraps_bam
            ]
            print args
            log.info(" ".join(args))
            result = run_cmd(" ".join(args),
                             stdout_fh=sys.stdout,
                             stderr_fh=sys.stderr)
            if result.exit_code != 0:
                return result.exit_code
            subreads_bam = new_prefix + ".subreads.bam"
            scraps_bam = new_prefix + ".scraps.bam"
            assert op.isfile(subreads_bam), "Missing {f}".format(f=subreads_bam)
            # FIXME we need a more general method for this
            ext_res_new = ExternalResource()
            ext_res_new.resourceId = subreads_bam
            ext_res_new.metaType = 'PacBio.SubreadFile.SubreadBamFile'
            ext_res_new.addIndices([subreads_bam + ".pbi"])
            ext_res_inner = ExternalResources()
            ext_res_scraps = ExternalResource()
            ext_res_scraps.resourceId = scraps_bam
            ext_res_scraps.metaType = 'PacBio.SubreadFile.ScrapsBamFile'
            ext_res_scraps.addIndices([scraps_bam + ".pbi"])
            ext_res_inner.append(ext_res_scraps)
            ext_res_new.append(ext_res_inner)
            ds_new.externalResources.append(ext_res_new)
        ds._filters.clearCallbacks()
        ds_new._filters = ds._filters
        ds_new._populateMetaTypes()
        ds_new.updateCounts()
        ds_new.write(output_file_name)
    return 0
开发者ID:mpkocher,项目名称:pbcoretools,代码行数:61,代码来源:converters.py

示例9: setUpClass

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import write [as 别名]
 def setUpClass(cls):
     tmp_bam = tempfile.NamedTemporaryFile(suffix=".subreads.bam").name
     shutil.copyfile(pbcore.data.getUnalignedBam(), tmp_bam)
     shutil.copyfile(pbcore.data.getUnalignedBam()+".pbi", tmp_bam+".pbi")
     ds = SubreadSet(tmp_bam, pbcore.data.getUnalignedBam(), strict=True)
     ds.write(cls.INPUT_FILES[0])
     _write_fasta_or_contigset(cls.INPUT_FILES[1], make_faidx=True,
                               ds_class=BarcodeSet)
     super(TestScatterSubreadBAMs, cls).setUpClass()
开发者ID:WenchaoLin,项目名称:pbcoretools,代码行数:11,代码来源:test_tasks_scatter_gather.py

示例10: _make_dataset

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import write [as 别名]
def _make_dataset(file_name=None, barcodes=None):
    if file_name is None:
        file_name = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
    ds = SubreadSet(BAM_FILE, strict=True)
    if barcodes is not None:
        for er in ds.externalResources:
            er.barcodes = barcodes
    ds.write(file_name)
    return file_name
开发者ID:vrainish-pacbio,项目名称:pbreports,代码行数:11,代码来源:test_pbreports_report_barcode.py

示例11: test_provenance_record_ordering

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import write [as 别名]
 def test_provenance_record_ordering(self):
     import pbtestdata
     ds = SubreadSet(pbtestdata.get_file("subreads-sequel"), strict=True)
     ds.metadata.addParentDataSet(uuid.uuid4(), ds.datasetType, createdBy="AnalysisJob", timeStampedName="")
     tmp_out = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     ds.write(tmp_out)
     ds = SubreadSet(tmp_out, strict=True)
     tags = [r['tag'] for r in ds.metadata.record['children']]
     self.assertEqual(tags, ['TotalLength', 'NumRecords', 'Provenance', 'Collections', 'SummaryStats'])
开发者ID:PacificBiosciences,项目名称:pbcore,代码行数:11,代码来源:test_pbdataset_subtypes.py

示例12: test_get_dataset_uuid

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import write [as 别名]
 def test_get_dataset_uuid(self):
     ds = SubreadSet(upstreamdata.getUnalignedBam(), strict=True)
     ds_file = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     ds.write(ds_file)
     uuid = getDataSetUuid(ds_file)
     self.assertEqual(uuid, ds.uuid)
     with open(ds_file, "w") as out:
         out.write("hello world!")
     uuid = getDataSetUuid(ds_file)
     self.assertEqual(uuid, None)
开发者ID:PacificBiosciences,项目名称:pbcore,代码行数:12,代码来源:test_pbdataset_utils.py

示例13: run_bam_to_bam

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import write [as 别名]
def run_bam_to_bam(subread_set_file, barcode_set_file, output_file_name,
                   nproc=1, score_mode="symmetric"):
    if not score_mode in ["asymmetric", "symmetric"]:
        raise ValueError("Unrecognized score mode '{m}'".format(m=score_mode))
    bc = BarcodeSet(barcode_set_file)
    if len(bc.resourceReaders()) > 1:
        raise NotImplementedError("Multi-FASTA BarcodeSet input is not supported.")
    barcode_fasta = bc.toExternalFiles()[0]
    with SubreadSet(subread_set_file) as ds:
        ds_new = SubreadSet(strict=True)
        for ext_res in ds.externalResources:
            subreads_bam = ext_res.bam
            scraps_bam = ext_res.scraps
            assert subreads_bam is not None
            if scraps_bam is None:
                raise TypeError("The input SubreadSet must include scraps.")
            new_prefix = op.join(op.dirname(output_file_name),
                re.sub(".subreads.bam", "_barcoded", op.basename(subreads_bam)))
            if not op.isabs(subreads_bam):
                subreads_bam = op.join(op.dirname(subread_set_file),
                    subreads_bam)
            if not op.isabs(scraps_bam):
                scraps_bam = op.join(op.dirname(subread_set_file), scraps_bam)
            args = [
                "bam2bam",
                "-j", str(nproc),
                "-b", str(nproc),
                "-o", new_prefix,
                "--barcodes", barcode_fasta,
                "--scoreMode", score_mode,
                subreads_bam, scraps_bam
            ]
            log.info(" ".join(args))
            result = run_cmd(" ".join(args),
                             stdout_fh=sys.stdout,
                             stderr_fh=sys.stderr)
            if result.exit_code != 0:
                return result.exit_code
            subreads_bam = new_prefix + ".subreads.bam"
            scraps_bam = new_prefix + ".scraps.bam"
            assert op.isfile(subreads_bam), "Missing {f}".format(f=subreads_bam)
            add_subread_resources(ds_new,
                subreads=subreads_bam,
                scraps=scraps_bam,
                barcodes=barcode_set_file)
        ds._filters.clearCallbacks()
        ds_new._filters = ds._filters
        ds_new._populateMetaTypes()
        ds_new.metadata = ds.metadata
        ds_new.name = ds.name + " (barcoded)"
        ds_new.updateCounts()
        ds_new.newUuid()
        ds_new.write(output_file_name)
    return 0
开发者ID:Debian,项目名称:pbcoretools,代码行数:56,代码来源:converters.py

示例14: test_subreads_parent_dataset

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import write [as 别名]
 def test_subreads_parent_dataset(self):
     ds1 = SubreadSet(data.getXml(no=5), skipMissing=True)
     self.assertEqual(ds1.metadata.provenance.parentDataSet.uniqueId,
                      "f81cf391-b3da-41f8-84cb-a0de71f460f4")
     ds2 = SubreadSet(ds1.externalResources[0].bam, skipMissing=True)
     self.assertEqual(ds2.metadata.provenance.parentDataSet.uniqueId, None)
     ds2.metadata.addParentDataSet("f81cf391-b3da-41f8-84cb-a0de71f460f4",
                                   "PacBio.DataSet.SubreadSet",
                                   "timestamped_name")
     self.assertEqual(ds2.metadata.provenance.parentDataSet.uniqueId,
                      "f81cf391-b3da-41f8-84cb-a0de71f460f4")
     ds_out = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
     ds2.write(ds_out, validate=False)
开发者ID:PacificBiosciences,项目名称:pbcore,代码行数:15,代码来源:test_pbdataset_subtypes.py

示例15: test_de_novo

# 需要导入模块: from pbcore.io import SubreadSet [as 别名]
# 或者: from pbcore.io.SubreadSet import write [as 别名]
    def test_de_novo(self):
        ofn = tempfile.NamedTemporaryFile(suffix=".subreadset.xml").name
        log.info(ofn)
        ss = SubreadSet(data.getXml(10))
        col = CollectionMetadata()
        self.assertFalse(ss.metadata.collections)

        ss.metadata.collections.append(col)
        self.assertTrue(ss.metadata.collections)

        col.cellIndex = 1
        self.assertTrue(ss.metadata.collections[0].cellIndex, 1)

        col.instrumentName = "foo"
        self.assertTrue(ss.metadata.collections[0].instrumentName, "foo")

        col.context = 'bar'
        self.assertTrue(ss.metadata.collections[0].context, "bar")

        ss.metadata.collections[0].runDetails.name = 'foo'
        self.assertEqual('foo', ss.metadata.collections[0].runDetails.name)

        ss.metadata.collections[0].wellSample.name = 'bar'
        self.assertEqual('bar', ss.metadata.collections[0].wellSample.name)

        ss.metadata.collections[0].wellSample.wellName = 'baz'
        self.assertEqual('baz', ss.metadata.collections[0].wellSample.wellName)

        ss.metadata.collections[0].wellSample.concentration = 'baz'
        self.assertEqual('baz',
                         ss.metadata.collections[0].wellSample.concentration)

        # There are no existing biosamples:
        self.assertFalse(
            'BioSamples' in ss.metadata.tags)
        # Therefore the metadata is falsy
        self.assertFalse(ss.metadata.bioSamples)

        ss.metadata.bioSamples.addSample('Clown')
        self.assertEqual('Clown', ss.metadata.bioSamples[0].name)

        ss.metadata.bioSamples[0].DNABarcodes.addBarcode('Dentist')
        self.assertEqual('Dentist',
                         ss.metadata.bioSamples[0].DNABarcodes[0].name)

        # check that we are adding one additional biosamples element:
        self.assertEqual(Counter(ss.metadata.tags)['BioSamples'], 1)
        # Therefore the metadata is truthy
        self.assertTrue(ss.metadata.bioSamples)
        ss.write(ofn, validate=False)
开发者ID:PacificBiosciences,项目名称:pbcore,代码行数:52,代码来源:test_pbdataset_metadata.py


注:本文中的pbcore.io.SubreadSet.write方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。