本文整理汇总了Python中pbcore.io.ContigSet.consolidate方法的典型用法代码示例。如果您正苦于以下问题:Python ContigSet.consolidate方法的具体用法?Python ContigSet.consolidate怎么用?Python ContigSet.consolidate使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pbcore.io.ContigSet
的用法示例。
在下文中一共展示了ContigSet.consolidate方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_contigset_consolidate_int_names
# 需要导入模块: from pbcore.io import ContigSet [as 别名]
# 或者: from pbcore.io.ContigSet import consolidate [as 别名]
def test_contigset_consolidate_int_names(self):
#build set to merge
outdir = tempfile.mkdtemp(suffix="dataset-unittest")
inFas = os.path.join(outdir, 'infile.fasta')
outFas1 = os.path.join(outdir, 'tempfile1.fasta')
outFas2 = os.path.join(outdir, 'tempfile2.fasta')
# copy fasta reference to hide fai and ensure FastaReader is used
backticks('cp {i} {o}'.format(
i=ReferenceSet(data.getXml(9)).toExternalFiles()[0],
o=inFas))
rs1 = ContigSet(inFas)
double = 'B.cereus.1'
exp_double = rs1.get_contig(double)
# todo: modify the names first:
with FastaWriter(outFas1) as writer:
writer.writeRecord('5141', exp_double.sequence)
with FastaWriter(outFas2) as writer:
writer.writeRecord('5142', exp_double.sequence)
exp_double_seqs = [exp_double.sequence, exp_double.sequence]
exp_names = ['5141', '5142']
obs_file = ContigSet(outFas1, outFas2)
log.debug(obs_file.toExternalFiles())
obs_file.consolidate()
log.debug(obs_file.toExternalFiles())
# open obs and compare to exp
for name, seq in zip(exp_names, exp_double_seqs):
self.assertEqual(obs_file.get_contig(name).sequence[:], seq)
示例2: test_contigset_consolidate_genomic_consensus
# 需要导入模块: from pbcore.io import ContigSet [as 别名]
# 或者: from pbcore.io.ContigSet import consolidate [as 别名]
def test_contigset_consolidate_genomic_consensus(self):
"""
Verify that the contigs output by GenomicConsensus (e.g. quiver) can
be consolidated.
"""
FASTA1 = ("lambda_NEB3011_0_60",
"GGGCGGCGACCTCGCGGGTTTTCGCTATTTATGAAAATTTTCCGGTTTAAGGCGTTTCCG")
FASTA2 = ("lambda_NEB3011_120_180",
"CACTGAATCATGGCTTTATGACGTAACATCCGTTTGGGATGCGACTGCCACGGCCCCGTG")
FASTA3 = ("lambda_NEB3011_60_120",
"GTGGACTCGGAGCAGTTCGGCAGCCAGCAGGTGAGCCGTAATTATCATCTGCGCGGGCGT")
files = []
for i, (header, seq) in enumerate([FASTA1, FASTA2, FASTA3]):
_files = []
for suffix in ["", "|quiver", "|plurality", "|arrow", "|poa"]:
tmpfile = tempfile.NamedTemporaryFile(suffix=".fasta").name
with open(tmpfile, "w") as f:
f.write(">{h}{s}\n{q}".format(h=header, s=suffix, q=seq))
_files.append(tmpfile)
files.append(_files)
for i in range(3):
ds = ContigSet(*[f[i] for f in files])
out1 = tempfile.NamedTemporaryFile(suffix=".contigset.xml").name
fa1 = tempfile.NamedTemporaryFile(suffix=".fasta").name
ds.consolidate(fa1)
ds.write(out1)
with ContigSet(out1) as ds_new:
self.assertEqual(len([rec for rec in ds_new]), 1,
"failed on %d" % i)
示例3: __gather_contigset
# 需要导入模块: from pbcore.io import ContigSet [as 别名]
# 或者: from pbcore.io.ContigSet import consolidate [as 别名]
def __gather_contigset(resource_file_extension, input_files, output_file,
new_resource_file=None,
skip_empty=True):
"""
:param input_files: List of file paths
:param output_file: File Path
:param new_resource_file: the path of the file to which the other contig
files are consolidated
:param skip_empty: Ignore empty files (doesn't do much yet)
:return: Output file
:rtype: str
"""
if skip_empty:
_input_files = []
for file_name in input_files:
cs = ContigSet(file_name)
if len(cs.toExternalFiles()) > 0:
_input_files.append(file_name)
input_files = _input_files
tbr = ContigSet(*input_files)
if not new_resource_file:
if output_file.endswith('xml'):
new_resource_file = output_file[:-3] + resource_file_extension
tbr.consolidate(new_resource_file)
tbr.newUuid()
tbr.write(output_file)
return output_file
示例4: test_contigset_consolidate
# 需要导入模块: from pbcore.io import ContigSet [as 别名]
# 或者: from pbcore.io.ContigSet import consolidate [as 别名]
def test_contigset_consolidate(self):
#build set to merge
outdir = tempfile.mkdtemp(suffix="dataset-unittest")
inFas = os.path.join(outdir, 'infile.fasta')
outFas1 = os.path.join(outdir, 'tempfile1.fasta')
outFas2 = os.path.join(outdir, 'tempfile2.fasta')
# copy fasta reference to hide fai and ensure FastaReader is used
backticks('cp {i} {o}'.format(
i=ReferenceSet(data.getXml(9)).toExternalFiles()[0],
o=inFas))
rs1 = ContigSet(inFas)
singletons = ['A.baumannii.1', 'A.odontolyticus.1']
double = 'B.cereus.1'
reader = rs1.resourceReaders()[0]
exp_double = rs1.get_contig(double)
exp_singles = [rs1.get_contig(name) for name in singletons]
# todo: modify the names first:
with FastaWriter(outFas1) as writer:
writer.writeRecord(exp_singles[0])
writer.writeRecord(exp_double.name + '_10_20', exp_double.sequence)
with FastaWriter(outFas2) as writer:
writer.writeRecord(exp_double.name + '_0_10',
exp_double.sequence + 'ATCGATCGATCG')
writer.writeRecord(exp_singles[1])
exp_double_seq = ''.join([exp_double.sequence,
'ATCGATCGATCG',
exp_double.sequence])
exp_single_seqs = [rec.sequence for rec in exp_singles]
acc_file = ContigSet(outFas1, outFas2)
acc_file.induceIndices()
log.debug(acc_file.toExternalFiles())
self.assertEqual(len(acc_file), 4)
self.assertEqual(len(list(acc_file)), 4)
acc_file.consolidate()
log.debug(acc_file.toExternalFiles())
# open acc and compare to exp
for name, seq in zip(singletons, exp_single_seqs):
self.assertEqual(acc_file.get_contig(name).sequence[:], seq)
self.assertEqual(acc_file.get_contig(double).sequence[:],
exp_double_seq)
self.assertEqual(len(acc_file._openReaders), 1)
self.assertEqual(len(acc_file.index), 3)
self.assertEqual(len(acc_file._indexMap), 3)
self.assertEqual(len(acc_file), 3)
self.assertEqual(len(list(acc_file)), 3)
# test merge:
acc1 = ContigSet(outFas1)
acc2 = ContigSet(outFas2)
acc3 = acc1 + acc2
示例5: gather_contigset
# 需要导入模块: from pbcore.io import ContigSet [as 别名]
# 或者: from pbcore.io.ContigSet import consolidate [as 别名]
def gather_contigset(input_files, output_file, new_resource_file=None,
skip_empty=True):
"""
:param input_files: List of file paths
:param output_file: File Path
:param new_resource_file: the path of the file to which the other contig
files are consolidated
:param skip_empty: Ignore empty files (doesn't do much yet)
:return: Output file
:rtype: str
"""
tbr = ContigSet(*input_files)
if not new_resource_file:
if output_file.endswith('xml'):
new_resource_file = output_file[:-3] + 'fasta'
tbr.consolidate(new_resource_file)
tbr.write(output_file)
return output_file
示例6: test_fastq_consolidate
# 需要导入模块: from pbcore.io import ContigSet [as 别名]
# 或者: from pbcore.io.ContigSet import consolidate [as 别名]
def test_fastq_consolidate(self):
fn = ('/pbi/dept/secondary/siv/testdata/SA3-RS/'
'lambda/2590980/0008/Analysis_Results/'
'm141115_075238_ethan_c100699872550000001'
'823139203261572_s1_p0.1.subreads.fastq')
fq_out = tempfile.NamedTemporaryFile(suffix=".fastq").name
cfq_out = tempfile.NamedTemporaryFile(suffix=".fastq").name
with open(fq_out, 'w') as fqh:
with open(fn, 'r') as fih:
for line in itertools.islice(fih, 240):
fqh.write(line)
cset = ContigSet(fq_out)
cset_l = sum(1 for _ in cset)
self.assertEqual(cset_l, 60)
cset.filters.addRequirement(length=[('>', 1000)])
cset_l = sum(1 for _ in cset)
self.assertEqual(cset_l, 23)
cset.consolidate(cfq_out)
cset_l = sum(1 for _ in cset)
cfq = FastqReader(cfq_out)
self.assertEqual(cset_l, 23)
self.assertEqual(cset_l, sum(1 for _ in cfq))
示例7: test_empty_fastq_consolidate
# 需要导入模块: from pbcore.io import ContigSet [as 别名]
# 或者: from pbcore.io.ContigSet import consolidate [as 别名]
def test_empty_fastq_consolidate(self):
fn = ('/pbi/dept/secondary/siv/testdata/SA3-RS/'
'lambda/2590980/0008/Analysis_Results/'
'm141115_075238_ethan_c100699872550000001'
'823139203261572_s1_p0.1.subreads.fastq')
fq1_out = tempfile.NamedTemporaryFile(suffix="1.fastq").name
fq2_out = tempfile.NamedTemporaryFile(suffix="2.fastq").name
cfq_out = tempfile.NamedTemporaryFile(suffix=".fastq").name
# Two full
with open(fq1_out, 'w') as fqh:
with open(fn, 'r') as fih:
for line in itertools.islice(fih, 240):
fqh.write(line)
with open(fq2_out, 'w') as fqh:
with open(fn, 'r') as fih:
for line in itertools.islice(fih, 240, 480):
fqh.write(line)
cset = ContigSet(fq1_out, fq2_out)
cset_l = sum(1 for _ in cset)
self.assertEqual(cset_l, 120)
cset.consolidate(cfq_out)
cset_l = sum(1 for _ in cset)
cfq = FastqReader(cfq_out)
self.assertEqual(cset_l, 120)
self.assertEqual(cset_l, sum(1 for _ in cfq))
# one full one empty
with open(fq1_out, 'w') as fqh:
with open(fn, 'r') as fih:
for line in itertools.islice(fih, 240):
fqh.write(line)
with open(fq2_out, 'w') as fqh:
with open(fn, 'r') as fih:
fqh.write("")
cset = ContigSet(fq1_out, fq2_out)
cset_l = sum(1 for _ in cset)
self.assertEqual(cset_l, 60)
cset.consolidate(cfq_out)
cset_l = sum(1 for _ in cset)
cfq = FastqReader(cfq_out)
self.assertEqual(cset_l, 60)
self.assertEqual(cset_l, sum(1 for _ in cfq))
# one empty one full
with open(fq1_out, 'w') as fqh:
with open(fn, 'r') as fih:
fqh.write("")
with open(fq2_out, 'w') as fqh:
with open(fn, 'r') as fih:
for line in itertools.islice(fih, 240):
fqh.write(line)
cset = ContigSet(fq1_out, fq2_out)
cset_l = sum(1 for _ in cset)
self.assertEqual(cset_l, 60)
cset.consolidate(cfq_out)
cset_l = sum(1 for _ in cset)
cfq = FastqReader(cfq_out)
self.assertEqual(cset_l, 60)
self.assertEqual(cset_l, sum(1 for _ in cfq))
# both empty
with open(fq1_out, 'w') as fqh:
with open(fn, 'r') as fih:
fqh.write("")
with open(fq2_out, 'w') as fqh:
with open(fn, 'r') as fih:
fqh.write("")
cset = ContigSet(fq1_out, fq2_out)
cset_l = sum(1 for _ in cset)
self.assertEqual(cset_l, 0)
cset.consolidate(cfq_out)
cset_l = sum(1 for _ in cset)
cfq = FastqReader(cfq_out)
self.assertEqual(cset_l, 0)
self.assertEqual(cset_l, sum(1 for _ in cfq))
示例8: consolidateXml
# 需要导入模块: from pbcore.io import ContigSet [as 别名]
# 或者: from pbcore.io.ContigSet import consolidate [as 别名]
def consolidateXml(args):
"""Combine BAMs and apply the filters described in the XML file, producing
one consolidated XML"""
dset = ContigSet(args.infile)
dset.consolidate(args.datafile)
dset.write(args.xmlfile)