Python io.ContigSet类代码示例

本文整理汇总了Python中pbcore.io.ContigSet类的典型用法代码示例。如果您正苦于以下问题：Python ContigSet类的具体用法？Python ContigSet怎么用？Python ContigSet使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了ContigSet类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: resolved_tool_contract_runner

def resolved_tool_contract_runner(resolved_contract):
    rc = resolved_contract
    alignment_path = rc.task.input_files[0]
    reference_path = rc.task.input_files[1]
    gff_path = rc.task.output_files[0]
    dataset_path = rc.task.output_files[1]
    fasta_path = re.sub(".contigset.xml", ".fasta", dataset_path)
    fastq_path = rc.task.output_files[2]
    args = [
        alignment_path,
        "--verbose",
        "--reference", reference_path,
        "--outputFilename", gff_path,
        "--outputFilename", fasta_path,
        "--outputFilename", fastq_path,
        "--numWorkers", str(rc.task.nproc),
        "--minCoverage", str(rc.task.options[Constants.MIN_COVERAGE_ID]),
        "--minConfidence", str(rc.task.options[Constants.MIN_CONFIDENCE_ID]),
        "--algorithm", rc.task.options[Constants.ALGORITHM_ID],
        "--alignmentSetRefWindows",
    ]
    if rc.task.options[Constants.DIPLOID_MODE_ID]:
        args.append("--diploid")
    args_ = get_parser().arg_parser.parser.parse_args(args)
    rc = args_runner(args_)
    if rc == 0:
        pysam.faidx(fasta_path)
        ds = ContigSet(fasta_path, strict=True)
        ds.write(dataset_path)
    return rc

开发者ID:lpp1985，项目名称:lpp_Script，代码行数:30，代码来源:main.py

示例2: run

def run(referenceset, fastq, gff, fasta, contigset, alignmentset, options, log_level):
    #'--log-file foo.log',
    #'--verbose',
    #'--debug', # requires 'ipdb'
    #'-j NWORKERS',
    #'--algorithm quiver',
    #'--diploid', # binary
    #'--minConfidence 40',
    #'--minCoverage 5',
    #'--alignmentSetRefWindows',
    cmd = "variantCaller --log-level {log_level} {options} --referenceFilename {referenceset} -o {fastq} -o {gff} -o {fasta} {alignmentset}"
    system(cmd.format(**locals()))
    try:
        say('Converting fasta {!r} to contigset {!r}'.format(fasta, contigset))
        # Convert to contigset.xml

        import pysam
        pysam.faidx(fasta) # pylint: disable=no-member
        # I do not know why pylint does not see this defined.

        ds = ContigSet(fasta, strict=True)
        ds.write(contigset, relPaths=True)
        say('Successfully wrapped fasta {!r} in contigset {!r}'.format(fasta, contigset))
    except Exception:
        say(traceback.format_exc())
        say('Skipping conversion to contigset.')

开发者ID:PacificBiosciences，项目名称:FALCON-polish，代码行数:26，代码来源:run_variantCaller.py

示例3: resolved_tool_contract_runner

def resolved_tool_contract_runner(resolved_contract):
    rc = resolved_contract
    alignment_path = rc.task.input_files[0]
    reference_path = rc.task.input_files[1]
    gff_path = rc.task.output_files[0]
    vcf_path = rc.task.output_files[1]
    dataset_path = rc.task.output_files[2]
    fasta_path = re.sub(".contigset.xml", ".fasta", dataset_path)
    fastq_path = rc.task.output_files[3]
    args = [
        alignment_path,
        "--verbose",
        "--reference", reference_path,
        "--outputFilename", gff_path,
        "--outputFilename", fasta_path,
        "--outputFilename", fastq_path,
        "--outputFilename", vcf_path,
        "--numWorkers", str(rc.task.nproc),
        "--minCoverage", str(rc.task.options[Constants.MIN_COVERAGE_ID]),
        "--minConfidence", str(rc.task.options[Constants.MIN_CONFIDENCE_ID]),
        "--maskRadius", str(Constants.DEFAULT_MASK_RADIUS) if \
                        bool(rc.task.options[Constants.MASKING_ID]) else "0",
        "--algorithm", rc.task.options[Constants.ALGORITHM_ID],
        "--alignmentSetRefWindows",
    ]
    args_ = get_parser().arg_parser.parser.parse_args(args)
    rc = args_runner(args_)
    if rc == 0:
        pysam.faidx(fasta_path)
        ds = ContigSet(fasta_path, strict=True)
        ds.write(dataset_path)
    return rc

开发者ID:PacificBiosciences，项目名称:GenomicConsensus，代码行数:32，代码来源:main.py

示例4: _get_fasta_path

def _get_fasta_path(file_name):
    if file_name.endswith(".contigset.xml"):
        ds = ContigSet(file_name)
        fasta_files = ds.toExternalFiles()
        assert len(fasta_files) == 1
        return fasta_files[0]
    return file_name

开发者ID:lpp1985，项目名称:lpp_Script，代码行数:7，代码来源:IcePartial.py

示例5: test_contigset_consolidate_genomic_consensus

 def test_contigset_consolidate_genomic_consensus(self):
     """
     Verify that the contigs output by GenomicConsensus (e.g. quiver) can
     be consolidated.
     """
     FASTA1 = ("lambda_NEB3011_0_60",
         "GGGCGGCGACCTCGCGGGTTTTCGCTATTTATGAAAATTTTCCGGTTTAAGGCGTTTCCG")
     FASTA2 = ("lambda_NEB3011_120_180",
         "CACTGAATCATGGCTTTATGACGTAACATCCGTTTGGGATGCGACTGCCACGGCCCCGTG")
     FASTA3 = ("lambda_NEB3011_60_120",
         "GTGGACTCGGAGCAGTTCGGCAGCCAGCAGGTGAGCCGTAATTATCATCTGCGCGGGCGT")
     files = []
     for i, (header, seq) in enumerate([FASTA1, FASTA2, FASTA3]):
         _files = []
         for suffix in ["", "|quiver", "|plurality", "|arrow", "|poa"]:
             tmpfile = tempfile.NamedTemporaryFile(suffix=".fasta").name
             with open(tmpfile, "w") as f:
                 f.write(">{h}{s}\n{q}".format(h=header, s=suffix, q=seq))
             _files.append(tmpfile)
         files.append(_files)
     for i in range(3):
         ds = ContigSet(*[f[i] for f in files])
         out1 = tempfile.NamedTemporaryFile(suffix=".contigset.xml").name
         fa1 = tempfile.NamedTemporaryFile(suffix=".fasta").name
         ds.consolidate(fa1)
         ds.write(out1)
         with ContigSet(out1) as ds_new:
             self.assertEqual(len([rec for rec in ds_new]), 1,
                              "failed on %d" % i)

开发者ID:vrainish-pacbio，项目名称:pbcore，代码行数:29，代码来源:test_pbdataset_subtypes.py

示例6: _write_fasta_or_contigset

def _write_fasta_or_contigset(file_name):
    fasta_file = re.sub(".contigset.xml", ".fasta", file_name)
    rec = [">chr%d\nacgtacgtacgt" % x for x in range(251)]
    with open(fasta_file, "w") as f:
        f.write("\n".join(rec))
    if file_name.endswith(".xml"):
        cs = ContigSet(fasta_file)
        cs.write(file_name)

开发者ID:gconcepcion，项目名称:pbsmrtpipe，代码行数:8，代码来源:test_tools_dev_tasks.py

示例7: makeReport

def makeReport(inReadsFN, inSummaryFN, outDir):
    """
    Generate a report with ID, tables, attributes and plot groups.

    inReadsFN --- an input FASTA file which has all consensus
    isoforms produced by pbtranscript.py cluster.
    This file is required to plot a read length histogram as part of
    the report:
         consensus_isoforms_readlength_hist.png

    inSummaryFN --- a summary TXT file with cluster attributes,
    including two attributes:
         number of consensus isoforms
         average length of consensus isoforms
    Attributes of the report are extracted from this file.

    """
    log.info("Plotting read length histogram from file: {f}".
             format(f=inReadsFN))

    # Collect read lengths of
    reader = ContigSet(inReadsFN)
    rs = [len(r.sequence) for r in reader]
    reader.close()
    readlengths = np.array(rs)

    # Plot read length histogram
    readlength_plot = create_readlength_plot(readlengths, outDir)
    readlength_group = PlotGroup(Constants.PG_READLENGTH,
                                 title="Read Length of Consensus Isoforms Reads",
                                 plots=[readlength_plot],
                                 thumbnail=readlength_plot.thumbnail)

    log.info("Plotting summary attributes from file: {f}".
             format(f=inSummaryFN))
    # Produce attributes based on summary.
    dataset_uuids = [ContigSet(inReadsFN).uuid]
    if inSummaryFN.endswith(".json"):
        attributes = _report_to_attributes(inSummaryFN)
        r = load_report_from_json(inSummaryFN)
        # FIXME(nechols)(2016-03-22) not using the dataset UUIDs from these
        # reports; should we be?
    else:
        attributes = summaryToAttributes(inSummaryFN)

    table = attributesToTable(attributes)
    log.info(str(table))

    # A report is consist of ID, tables, attributes, and plotgroups.
    report = Report(Constants.R_ID,
                    title="Transcript Clustering",
                    attributes=attributes,
                    plotgroups=[readlength_group],
                    dataset_uuids=dataset_uuids)

    return report

开发者ID:vrainish-pacbio，项目名称:pbreports，代码行数:56，代码来源:isoseq_cluster.py

示例8: __gather_contigset

def __gather_contigset(resource_file_extension, input_files, output_file,
                       new_resource_file=None,
                       skip_empty=True):
    """
    :param input_files: List of file paths
    :param output_file: File Path
    :param new_resource_file: the path of the file to which the other contig
                              files are consolidated
    :param skip_empty: Ignore empty files (doesn't do much yet)

    :return: Output file

    :rtype: str
    """
    if skip_empty:
        _input_files = []
        for file_name in input_files:
            cs = ContigSet(file_name)
            if len(cs.toExternalFiles()) > 0:
                _input_files.append(file_name)
        input_files = _input_files
    tbr = ContigSet(*input_files)
    if not new_resource_file:
        if output_file.endswith('xml'):
            new_resource_file = output_file[:-3] + resource_file_extension
    tbr.consolidate(new_resource_file)
    tbr.newUuid()
    tbr.write(output_file)
    return output_file

开发者ID:lpp1985，项目名称:lpp_Script，代码行数:29，代码来源:gather.py

示例9: _write_fasta_or_contigset

def _write_fasta_or_contigset(file_name, make_faidx=False, n_records=251):
    fasta_file = re.sub(".contigset.xml", ".fasta", file_name)
    rec = [">chr%d\nacgtacgtacgt" % x for x in range(n_records)]
    with open(fasta_file, "w") as f:
        f.write("\n".join(rec))
        f.flush()
    if make_faidx:
        pysam.faidx(fasta_file)
    if file_name.endswith(".xml"):
        cs = ContigSet(fasta_file, strict=make_faidx)
        cs.write(file_name)

开发者ID:mpkocher，项目名称:pbcoretools，代码行数:11，代码来源:test_tasks_scatter_gather.py

示例10: write_contigset_records

def write_contigset_records(pbcore_writer_class, records, file_name):
    """
    Writes the Chunked fasta files and Writes a ContigSet.xml

    Filename has contigset.xml

    """
    fasta_file_name = ".".join(file_name.split(".")[:-2]) + ".fasta"
    write_pbcore_records(pbcore_writer_class, records, fasta_file_name)
    log.debug("Writing ContigSet XML to {f}".format(f=file_name))
    ds = ContigSet(fasta_file_name)
    ds.write(file_name)

开发者ID:Debian，项目名称:pbcoretools，代码行数:12，代码来源:chunk_utils.py

示例11: test_contigset_consolidate_int_names

    def test_contigset_consolidate_int_names(self):
        #build set to merge
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")

        inFas = os.path.join(outdir, 'infile.fasta')
        outFas1 = os.path.join(outdir, 'tempfile1.fasta')
        outFas2 = os.path.join(outdir, 'tempfile2.fasta')

        # copy fasta reference to hide fai and ensure FastaReader is used
        backticks('cp {i} {o}'.format(
                      i=ReferenceSet(data.getXml(9)).toExternalFiles()[0],
                      o=inFas))
        rs1 = ContigSet(inFas)

        double = 'B.cereus.1'
        exp_double = rs1.get_contig(double)

        # todo: modify the names first:
        with FastaWriter(outFas1) as writer:
            writer.writeRecord('5141', exp_double.sequence)
        with FastaWriter(outFas2) as writer:
            writer.writeRecord('5142', exp_double.sequence)

        exp_double_seqs = [exp_double.sequence, exp_double.sequence]
        exp_names = ['5141', '5142']

        obs_file = ContigSet(outFas1, outFas2)
        log.debug(obs_file.toExternalFiles())
        obs_file.consolidate()
        log.debug(obs_file.toExternalFiles())

        # open obs and compare to exp
        for name, seq in zip(exp_names, exp_double_seqs):
            self.assertEqual(obs_file.get_contig(name).sequence[:], seq)

开发者ID:vrainish-pacbio，项目名称:pbcore，代码行数:34，代码来源:test_pbdataset_subtypes.py

示例12: test_contigset_write

 def test_contigset_write(self):
     fasta = upstreamData.getLambdaFasta()
     ds = ContigSet(fasta)
     self.assertTrue(isinstance(ds.resourceReaders()[0],
                                IndexedFastaReader))
     outdir = tempfile.mkdtemp(suffix="dataset-unittest")
     outfn = os.path.join(outdir, 'test.fasta')
     w = FastaWriter(outfn)
     for rec in ds:
         w.writeRecord(rec)
     w.close()
     fas = FastaReader(outfn)
     for rec in fas:
         # make sure a __repr__ didn't slip through:
         self.assertFalse(rec.sequence.startswith('<'))

开发者ID:vrainish-pacbio，项目名称:pbcore，代码行数:15，代码来源:test_pbdataset_subtypes.py

示例13: test_missing_fai_error_message

    def test_missing_fai_error_message(self):
        outdir = tempfile.mkdtemp(suffix="dataset-unittest")

        inFas = os.path.join(outdir, 'infile.fasta')

        # copy fasta reference to hide fai and ensure FastaReader is used
        backticks('cp {i} {o}'.format(
            i=ReferenceSet(data.getXml(9)).toExternalFiles()[0],
            o=inFas))
        rs1 = ContigSet(inFas)
        with self.assertRaises(IOError) as cm:
            rs1.assertIndexed()
        self.assertEqual(
            str(cm.exception),
            ( "Companion FASTA index (.fai) file not found or malformatted! "
             "Use 'samtools faidx' to generate FASTA index."))

开发者ID:vrainish-pacbio，项目名称:pbcore，代码行数:16，代码来源:test_pbdataset_subtypes.py

示例14: test_len_fastq

 def test_len_fastq(self):
     fn = ('/pbi/dept/secondary/siv/testdata/SA3-RS/'
           'lambda/2590980/0008/Analysis_Results/'
           'm141115_075238_ethan_c100699872550000001'
           '823139203261572_s1_p0.1.subreads.fastq')
     fq_out = tempfile.NamedTemporaryFile(suffix=".fastq").name
     with open(fq_out, 'w') as fqh:
         with open(fn, 'r') as fih:
             for line in itertools.islice(fih, 24):
                 fqh.write(line)
     cset = ContigSet(fq_out)
     self.assertFalse(cset.isIndexed)
     self.assertTrue(isinstance(cset.resourceReaders()[0], FastqReader))
     self.assertEqual(sum(1 for _ in cset),
                      sum(1 for _ in FastqReader(fq_out)))
     self.assertEqual(sum(1 for _ in cset), 6)

开发者ID:vrainish-pacbio，项目名称:pbcore，代码行数:16，代码来源:test_pbdataset_subtypes.py

示例15: as_contigset

def as_contigset(fasta_file, xml_file):
    if fasta_file == xml_file or xml_file is None:
        if not op.isfile(fasta_file) or op.getsize(fasta_file) == 0:
            return ContigSet()
        return ContigSet(fasta_file)
    file_size = op.getsize(fasta_file)

    fai_file = fasta_file + ".fai"
    if op.exists(fai_file):
        os.remove(fai_file)

    ds = ContigSet(fasta_file, generateIndices=True)
    ds.write(xml_file)
    if not file_size > 0:
        with open(fai_file, "w") as fai:
            fai.write("")
    return ds

开发者ID:ylipacbio，项目名称:pbtranscript，代码行数:17，代码来源:Utils.py

注：本文中的pbcore.io.ContigSet类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。