本文整理汇总了Python中pbcore.io.ContigSet类的典型用法代码示例。如果您正苦于以下问题:Python ContigSet类的具体用法?Python ContigSet怎么用?Python ContigSet使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了ContigSet类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: resolved_tool_contract_runner
def resolved_tool_contract_runner(resolved_contract):
rc = resolved_contract
alignment_path = rc.task.input_files[0]
reference_path = rc.task.input_files[1]
gff_path = rc.task.output_files[0]
dataset_path = rc.task.output_files[1]
fasta_path = re.sub(".contigset.xml", ".fasta", dataset_path)
fastq_path = rc.task.output_files[2]
args = [
alignment_path,
"--verbose",
"--reference", reference_path,
"--outputFilename", gff_path,
"--outputFilename", fasta_path,
"--outputFilename", fastq_path,
"--numWorkers", str(rc.task.nproc),
"--minCoverage", str(rc.task.options[Constants.MIN_COVERAGE_ID]),
"--minConfidence", str(rc.task.options[Constants.MIN_CONFIDENCE_ID]),
"--algorithm", rc.task.options[Constants.ALGORITHM_ID],
"--alignmentSetRefWindows",
]
if rc.task.options[Constants.DIPLOID_MODE_ID]:
args.append("--diploid")
args_ = get_parser().arg_parser.parser.parse_args(args)
rc = args_runner(args_)
if rc == 0:
pysam.faidx(fasta_path)
ds = ContigSet(fasta_path, strict=True)
ds.write(dataset_path)
return rc
示例2: run
def run(referenceset, fastq, gff, fasta, contigset, alignmentset, options, log_level):
#'--log-file foo.log',
#'--verbose',
#'--debug', # requires 'ipdb'
#'-j NWORKERS',
#'--algorithm quiver',
#'--diploid', # binary
#'--minConfidence 40',
#'--minCoverage 5',
#'--alignmentSetRefWindows',
cmd = "variantCaller --log-level {log_level} {options} --referenceFilename {referenceset} -o {fastq} -o {gff} -o {fasta} {alignmentset}"
system(cmd.format(**locals()))
try:
say('Converting fasta {!r} to contigset {!r}'.format(fasta, contigset))
# Convert to contigset.xml
import pysam
pysam.faidx(fasta) # pylint: disable=no-member
# I do not know why pylint does not see this defined.
ds = ContigSet(fasta, strict=True)
ds.write(contigset, relPaths=True)
say('Successfully wrapped fasta {!r} in contigset {!r}'.format(fasta, contigset))
except Exception:
say(traceback.format_exc())
say('Skipping conversion to contigset.')
示例3: resolved_tool_contract_runner
def resolved_tool_contract_runner(resolved_contract):
rc = resolved_contract
alignment_path = rc.task.input_files[0]
reference_path = rc.task.input_files[1]
gff_path = rc.task.output_files[0]
vcf_path = rc.task.output_files[1]
dataset_path = rc.task.output_files[2]
fasta_path = re.sub(".contigset.xml", ".fasta", dataset_path)
fastq_path = rc.task.output_files[3]
args = [
alignment_path,
"--verbose",
"--reference", reference_path,
"--outputFilename", gff_path,
"--outputFilename", fasta_path,
"--outputFilename", fastq_path,
"--outputFilename", vcf_path,
"--numWorkers", str(rc.task.nproc),
"--minCoverage", str(rc.task.options[Constants.MIN_COVERAGE_ID]),
"--minConfidence", str(rc.task.options[Constants.MIN_CONFIDENCE_ID]),
"--maskRadius", str(Constants.DEFAULT_MASK_RADIUS) if \
bool(rc.task.options[Constants.MASKING_ID]) else "0",
"--algorithm", rc.task.options[Constants.ALGORITHM_ID],
"--alignmentSetRefWindows",
]
args_ = get_parser().arg_parser.parser.parse_args(args)
rc = args_runner(args_)
if rc == 0:
pysam.faidx(fasta_path)
ds = ContigSet(fasta_path, strict=True)
ds.write(dataset_path)
return rc
示例4: _get_fasta_path
def _get_fasta_path(file_name):
if file_name.endswith(".contigset.xml"):
ds = ContigSet(file_name)
fasta_files = ds.toExternalFiles()
assert len(fasta_files) == 1
return fasta_files[0]
return file_name
示例5: test_contigset_consolidate_genomic_consensus
def test_contigset_consolidate_genomic_consensus(self):
"""
Verify that the contigs output by GenomicConsensus (e.g. quiver) can
be consolidated.
"""
FASTA1 = ("lambda_NEB3011_0_60",
"GGGCGGCGACCTCGCGGGTTTTCGCTATTTATGAAAATTTTCCGGTTTAAGGCGTTTCCG")
FASTA2 = ("lambda_NEB3011_120_180",
"CACTGAATCATGGCTTTATGACGTAACATCCGTTTGGGATGCGACTGCCACGGCCCCGTG")
FASTA3 = ("lambda_NEB3011_60_120",
"GTGGACTCGGAGCAGTTCGGCAGCCAGCAGGTGAGCCGTAATTATCATCTGCGCGGGCGT")
files = []
for i, (header, seq) in enumerate([FASTA1, FASTA2, FASTA3]):
_files = []
for suffix in ["", "|quiver", "|plurality", "|arrow", "|poa"]:
tmpfile = tempfile.NamedTemporaryFile(suffix=".fasta").name
with open(tmpfile, "w") as f:
f.write(">{h}{s}\n{q}".format(h=header, s=suffix, q=seq))
_files.append(tmpfile)
files.append(_files)
for i in range(3):
ds = ContigSet(*[f[i] for f in files])
out1 = tempfile.NamedTemporaryFile(suffix=".contigset.xml").name
fa1 = tempfile.NamedTemporaryFile(suffix=".fasta").name
ds.consolidate(fa1)
ds.write(out1)
with ContigSet(out1) as ds_new:
self.assertEqual(len([rec for rec in ds_new]), 1,
"failed on %d" % i)
示例6: _write_fasta_or_contigset
def _write_fasta_or_contigset(file_name):
fasta_file = re.sub(".contigset.xml", ".fasta", file_name)
rec = [">chr%d\nacgtacgtacgt" % x for x in range(251)]
with open(fasta_file, "w") as f:
f.write("\n".join(rec))
if file_name.endswith(".xml"):
cs = ContigSet(fasta_file)
cs.write(file_name)
示例7: makeReport
def makeReport(inReadsFN, inSummaryFN, outDir):
"""
Generate a report with ID, tables, attributes and plot groups.
inReadsFN --- an input FASTA file which has all consensus
isoforms produced by pbtranscript.py cluster.
This file is required to plot a read length histogram as part of
the report:
consensus_isoforms_readlength_hist.png
inSummaryFN --- a summary TXT file with cluster attributes,
including two attributes:
number of consensus isoforms
average length of consensus isoforms
Attributes of the report are extracted from this file.
"""
log.info("Plotting read length histogram from file: {f}".
format(f=inReadsFN))
# Collect read lengths of
reader = ContigSet(inReadsFN)
rs = [len(r.sequence) for r in reader]
reader.close()
readlengths = np.array(rs)
# Plot read length histogram
readlength_plot = create_readlength_plot(readlengths, outDir)
readlength_group = PlotGroup(Constants.PG_READLENGTH,
title="Read Length of Consensus Isoforms Reads",
plots=[readlength_plot],
thumbnail=readlength_plot.thumbnail)
log.info("Plotting summary attributes from file: {f}".
format(f=inSummaryFN))
# Produce attributes based on summary.
dataset_uuids = [ContigSet(inReadsFN).uuid]
if inSummaryFN.endswith(".json"):
attributes = _report_to_attributes(inSummaryFN)
r = load_report_from_json(inSummaryFN)
# FIXME(nechols)(2016-03-22) not using the dataset UUIDs from these
# reports; should we be?
else:
attributes = summaryToAttributes(inSummaryFN)
table = attributesToTable(attributes)
log.info(str(table))
# A report is consist of ID, tables, attributes, and plotgroups.
report = Report(Constants.R_ID,
title="Transcript Clustering",
attributes=attributes,
plotgroups=[readlength_group],
dataset_uuids=dataset_uuids)
return report
示例8: __gather_contigset
def __gather_contigset(resource_file_extension, input_files, output_file,
new_resource_file=None,
skip_empty=True):
"""
:param input_files: List of file paths
:param output_file: File Path
:param new_resource_file: the path of the file to which the other contig
files are consolidated
:param skip_empty: Ignore empty files (doesn't do much yet)
:return: Output file
:rtype: str
"""
if skip_empty:
_input_files = []
for file_name in input_files:
cs = ContigSet(file_name)
if len(cs.toExternalFiles()) > 0:
_input_files.append(file_name)
input_files = _input_files
tbr = ContigSet(*input_files)
if not new_resource_file:
if output_file.endswith('xml'):
new_resource_file = output_file[:-3] + resource_file_extension
tbr.consolidate(new_resource_file)
tbr.newUuid()
tbr.write(output_file)
return output_file
示例9: _write_fasta_or_contigset
def _write_fasta_or_contigset(file_name, make_faidx=False, n_records=251):
fasta_file = re.sub(".contigset.xml", ".fasta", file_name)
rec = [">chr%d\nacgtacgtacgt" % x for x in range(n_records)]
with open(fasta_file, "w") as f:
f.write("\n".join(rec))
f.flush()
if make_faidx:
pysam.faidx(fasta_file)
if file_name.endswith(".xml"):
cs = ContigSet(fasta_file, strict=make_faidx)
cs.write(file_name)
示例10: write_contigset_records
def write_contigset_records(pbcore_writer_class, records, file_name):
"""
Writes the Chunked fasta files and Writes a ContigSet.xml
Filename has contigset.xml
"""
fasta_file_name = ".".join(file_name.split(".")[:-2]) + ".fasta"
write_pbcore_records(pbcore_writer_class, records, fasta_file_name)
log.debug("Writing ContigSet XML to {f}".format(f=file_name))
ds = ContigSet(fasta_file_name)
ds.write(file_name)
示例11: test_contigset_consolidate_int_names
def test_contigset_consolidate_int_names(self):
#build set to merge
outdir = tempfile.mkdtemp(suffix="dataset-unittest")
inFas = os.path.join(outdir, 'infile.fasta')
outFas1 = os.path.join(outdir, 'tempfile1.fasta')
outFas2 = os.path.join(outdir, 'tempfile2.fasta')
# copy fasta reference to hide fai and ensure FastaReader is used
backticks('cp {i} {o}'.format(
i=ReferenceSet(data.getXml(9)).toExternalFiles()[0],
o=inFas))
rs1 = ContigSet(inFas)
double = 'B.cereus.1'
exp_double = rs1.get_contig(double)
# todo: modify the names first:
with FastaWriter(outFas1) as writer:
writer.writeRecord('5141', exp_double.sequence)
with FastaWriter(outFas2) as writer:
writer.writeRecord('5142', exp_double.sequence)
exp_double_seqs = [exp_double.sequence, exp_double.sequence]
exp_names = ['5141', '5142']
obs_file = ContigSet(outFas1, outFas2)
log.debug(obs_file.toExternalFiles())
obs_file.consolidate()
log.debug(obs_file.toExternalFiles())
# open obs and compare to exp
for name, seq in zip(exp_names, exp_double_seqs):
self.assertEqual(obs_file.get_contig(name).sequence[:], seq)
示例12: test_contigset_write
def test_contigset_write(self):
fasta = upstreamData.getLambdaFasta()
ds = ContigSet(fasta)
self.assertTrue(isinstance(ds.resourceReaders()[0],
IndexedFastaReader))
outdir = tempfile.mkdtemp(suffix="dataset-unittest")
outfn = os.path.join(outdir, 'test.fasta')
w = FastaWriter(outfn)
for rec in ds:
w.writeRecord(rec)
w.close()
fas = FastaReader(outfn)
for rec in fas:
# make sure a __repr__ didn't slip through:
self.assertFalse(rec.sequence.startswith('<'))
示例13: test_missing_fai_error_message
def test_missing_fai_error_message(self):
outdir = tempfile.mkdtemp(suffix="dataset-unittest")
inFas = os.path.join(outdir, 'infile.fasta')
# copy fasta reference to hide fai and ensure FastaReader is used
backticks('cp {i} {o}'.format(
i=ReferenceSet(data.getXml(9)).toExternalFiles()[0],
o=inFas))
rs1 = ContigSet(inFas)
with self.assertRaises(IOError) as cm:
rs1.assertIndexed()
self.assertEqual(
str(cm.exception),
( "Companion FASTA index (.fai) file not found or malformatted! "
"Use 'samtools faidx' to generate FASTA index."))
示例14: test_len_fastq
def test_len_fastq(self):
fn = ('/pbi/dept/secondary/siv/testdata/SA3-RS/'
'lambda/2590980/0008/Analysis_Results/'
'm141115_075238_ethan_c100699872550000001'
'823139203261572_s1_p0.1.subreads.fastq')
fq_out = tempfile.NamedTemporaryFile(suffix=".fastq").name
with open(fq_out, 'w') as fqh:
with open(fn, 'r') as fih:
for line in itertools.islice(fih, 24):
fqh.write(line)
cset = ContigSet(fq_out)
self.assertFalse(cset.isIndexed)
self.assertTrue(isinstance(cset.resourceReaders()[0], FastqReader))
self.assertEqual(sum(1 for _ in cset),
sum(1 for _ in FastqReader(fq_out)))
self.assertEqual(sum(1 for _ in cset), 6)
示例15: as_contigset
def as_contigset(fasta_file, xml_file):
if fasta_file == xml_file or xml_file is None:
if not op.isfile(fasta_file) or op.getsize(fasta_file) == 0:
return ContigSet()
return ContigSet(fasta_file)
file_size = op.getsize(fasta_file)
fai_file = fasta_file + ".fai"
if op.exists(fai_file):
os.remove(fai_file)
ds = ContigSet(fasta_file, generateIndices=True)
ds.write(xml_file)
if not file_size > 0:
with open(fai_file, "w") as fai:
fai.write("")
return ds