本文整理汇总了Python中Bio.SeqIO.SffIO.SffWriter类的典型用法代码示例。如果您正苦于以下问题:Python SffWriter类的具体用法?Python SffWriter怎么用?Python SffWriter使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SffWriter类的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: sff_filter
def sff_filter(in_file, out_file, iterator_filter, inter):
count = 0
try:
from Bio.SeqIO.SffIO import SffIterator, SffWriter
except ImportError:
sys_exit("SFF filtering requires Biopython 1.54 or later")
try:
from Bio.SeqIO.SffIO import ReadRocheXmlManifest
except ImportError:
#Prior to Biopython 1.56 this was a private function
from Bio.SeqIO.SffIO import _sff_read_roche_index_xml as ReadRocheXmlManifest
with open(in_file, "rb") as in_handle:
try:
manifest = ReadRocheXmlManifest(in_handle)
except ValueError:
manifest = None
in_handle.seek(0)
with open(out_file, "wb") as out_handle:
writer = SffWriter(out_handle, xml=manifest)
in_handle.seek(0) #start again after getting manifest
if inter:
from itertools import chain
count = writer.write_file(chain.from_iterable(iterator_filter(pair(SffIterator(in_handle)))))
assert count % 2 == 0, "Odd number of records? %i" % count
count /= 2
else:
count = writer.write_file(iterator_filter(SffIterator(in_handle)))
#count = writer.write_file(SffIterator(in_handle))
return count
示例2: sff_filter
def sff_filter(in_file, pos_file, neg_file, wanted):
"""SFF filter."""
try:
from Bio.SeqIO.SffIO import SffIterator, SffWriter
except ImportError:
sys.exit("SFF filtering requires Biopython 1.54 or later")
try:
from Bio.SeqIO.SffIO import ReadRocheXmlManifest
except ImportError:
# Prior to Biopython 1.56 this was a private function
from Bio.SeqIO.SffIO import _sff_read_roche_index_xml as ReadRocheXmlManifest
in_handle = open(in_file, "rb") # must be binary mode!
try:
manifest = ReadRocheXmlManifest(in_handle)
except ValueError:
manifest = None
# This makes two passes though the SFF file with isn't so efficient,
# but this makes the code simple.
pos_count = neg_count = 0
if pos_file is not None:
out_handle = open(pos_file, "wb")
writer = SffWriter(out_handle, xml=manifest)
in_handle.seek(0) # start again after getting manifest
pos_count = writer.write_file(
rec for rec in SffIterator(in_handle) if clean_name(rec.id) in wanted
)
out_handle.close()
if neg_file is not None:
out_handle = open(neg_file, "wb")
writer = SffWriter(out_handle, xml=manifest)
in_handle.seek(0) # start again
neg_count = writer.write_file(
rec for rec in SffIterator(in_handle) if clean_name(rec.id) not in wanted
)
out_handle.close()
# And we're done
in_handle.close()
# At the time of writing, Galaxy doesn't show SFF file read counts,
# so it is useful to put them in stdout and thus shown in job info.
return pos_count, neg_count
示例3: test_no_index
def test_no_index(self):
# Does a lot of work to create a no-index SFF file
# (in the process checking this bit of SffWriter works)
records = list(SeqIO.parse(BytesIO(self.good), "sff"))
with BytesIO() as handle:
writer = SffWriter(handle, index=False)
count = writer.write_file(records)
self.assertEqual(count, len(records))
handle.seek(0)
new = list(SeqIO.parse(handle, "sff"))
self.assertEqual(len(records), len(new))
for a, b in zip(records, new):
self.assertEqual(a.id, b.id)
handle.seek(0)
try:
values = _sff_find_roche_index(handle)
except ValueError as err:
self.assertEqual(str(err), "No index present in this SFF file")
else:
self.assertTrue(False, "Test _sff_find_roche_index did not raise exception")
示例4: run
def run( self, proc_name = None ):
sffpath = self.id_str + '.sff'
try:
with open( sffpath, 'wb' ) as fh:
self.proc_name = proc_name
self.sff_file = SffWriter( fh )
self.sff_file.write_file( self.reads_for_barcode( self.reads_sff ) )
logger.info( "%s reads of %s matched %s" % (self._matched_reads, self._processed, self.id_str) )
except ValueError:
# No reads for barcode so remove the temporary file
os.unlink( sffpath )
示例5: sff_filter
def sff_filter(in_file, out_file, iterator_filter):
count = 0
try:
from Bio.SeqIO.SffIO import SffIterator, SffWriter
except ImportError:
stop_err("SFF filtering requires Biopython 1.54 or later")
try:
from Bio.SeqIO.SffIO import ReadRocheXmlManifest
except ImportError:
#Prior to Biopython 1.56 this was a private function
from Bio.SeqIO.SffIO import _sff_read_roche_index_xml as ReadRocheXmlManifest
with open(in_file, "rb") as in_handle:
try:
manifest = ReadRocheXmlManifest(in_handle)
except ValueError:
manifest = None
in_handle.seek(0)
with open(out_file, "wb") as out_handle:
writer = SffWriter(out_handle, xml=manifest)
in_handle.seek(0) #start again after getting manifest
count = writer.write_file(iterator_filter(SffIterator(in_handle)))
#count = writer.write_file(SffIterator(in_handle))
return count
示例6: test_write
def test_write(self):
filename = "Roche/E3MFGYR02_random_10_reads.sff"
with open(filename, "rb") as handle:
metadata = ReadRocheXmlManifest(handle)
with open(filename, "rb") as handle:
sff = list(SffIterator(handle))
b_handle = BytesIO()
w = SffWriter(b_handle, xml=metadata)
w.write_file(sff) # list
data = b_handle.getvalue()
# And again with an iterator...
handle = BytesIO()
w = SffWriter(handle, xml=metadata)
w.write_file(iter(sff))
self.assertEqual(data, handle.getvalue())
# Check 100% identical to the original:
with open(filename, "rb") as handle:
original = handle.read()
self.assertEqual(len(data), len(original))
self.assertEqual(data, original)
del data
示例7: chr
if padding:
padding = 8 - padding
index += chr(0) * padding
assert len(index) % 8 == 0
# Ugly bit of code to make a fake index at start
records = list(SffIterator(
open("Roche/E3MFGYR02_random_10_reads.sff", "rb")))
out_handle = open(
"Roche/E3MFGYR02_alt_index_at_start.sff", "w")
index = ".diy1.00This is a fake index block (DIY = Do It Yourself), which is allowed under the SFF standard.\0"
padding = len(index) % 8
if padding:
padding = 8 - padding
index += chr(0) * padding
w = SffWriter(out_handle, index=False, xml=None)
# Fake the header...
w._number_of_reads = len(records)
w._index_start = 0
w._index_length = 0
w._key_sequence = records[0].annotations["flow_key"]
w._flow_chars = records[0].annotations["flow_chars"]
w._number_of_flows_per_read = len(w._flow_chars)
w.write_header()
w._index_start = out_handle.tell()
w._index_length = len(index)
out_handle.seek(0)
w.write_header() # this time with index info
w.handle.write(index)
for record in records:
w.write_record(record)
示例8: open
except ImportError:
sys.exit("Requires Biopython 1.54 or later")
try:
from Bio.SeqIO.SffIO import ReadRocheXmlManifest
except ImportError:
#Prior to Biopython 1.56 this was a private function
from Bio.SeqIO.SffIO import _sff_read_roche_index_xml as ReadRocheXmlManifest
in_handle = open(in_file, "rb") #must be binary mode!
try:
manifest = ReadRocheXmlManifest(in_handle)
except ValueError:
manifest = None
out_handle = open(out_file, "wb")
writer = SffWriter(out_handle, xml=manifest)
in_handle.seek(0) #start again after getting manifest
count = writer.write_file(rename_seqrecords(SffIterator(in_handle), rename))
out_handle.close()
in_handle.close()
else:
#Use Galaxy for FASTA, QUAL or FASTQ
if seq_format.lower() in ["fasta", "csfasta"] \
or seq_format.lower().startswith("qual"):
from galaxy_utils.sequence.fasta import fastaReader, fastaWriter
reader = fastaReader(open(in_file, "rU"))
writer = fastaWriter(open(out_file, "w"))
marker = ">"
elif seq_format.lower().startswith("fastq"):
from galaxy_utils.sequence.fastq import fastqReader, fastqWriter
reader = fastqReader(open(in_file, "rU"))
示例9: open
try:
from Bio.SeqIO.SffIO import ReadRocheXmlManifest
except ImportError:
#Prior to Biopython 1.56 this was a private function
from Bio.SeqIO.SffIO import _sff_read_roche_index_xml as ReadRocheXmlManifest
in_handle = open(in_file, "rb") #must be binary mode!
try:
manifest = ReadRocheXmlManifest(in_handle)
except ValueError:
manifest = None
in_handle.close()
out_handle = open(out_file, "wb")
writer = SffWriter(out_handle, xml=manifest)
count = 0
#This does have the overhead of parsing into SeqRecord objects,
#but doing the header and index at the low level is too fidly.
iterator = (records[name] for name in parse_ids(tabular_file, column))
try:
count = writer.write_file(iterator)
except KeyError, err:
out_handle.close()
if name not in records:
stop_err("Identifier %r not found in sequence file" % name)
else:
raise err
out_handle.close()
else:
#Avoid overhead of parsing into SeqRecord objects,
示例10: len
short_clipped += 1
elif keep_negatives:
if len(seq) >= min_len:
negs += 1
yield record
else:
short_neg += 1
in_handle = open(in_file, "rb")
try:
manifest = ReadRocheXmlManifest(in_handle)
except ValueError:
manifest = None
in_handle.seek(0)
out_handle = open(out_file, "wb")
writer = SffWriter(out_handle, xml=manifest)
writer.write_file(process(SffIterator(in_handle)))
#End of SFF code
elif seq_format.lower().startswith("fastq"):
in_handle = open(in_file, "rU")
out_handle = open(out_file, "w")
reader = fastqReader(in_handle)
writer = fastqWriter(out_handle)
if forward:
for record in reader:
seq = record.sequence.upper()
result = primer.search(seq)
if result:
#Forward primer, take everything after it
cut = result.end()
record.sequence = seq[cut:]
示例11: open
try:
from Bio.SeqIO.SffIO import ReadRocheXmlManifest
except ImportError:
#Prior to Biopython 1.56 this was a private function
from Bio.SeqIO.SffIO import _sff_read_roche_index_xml as ReadRocheXmlManifest
in_handle = open(in_file, "rb") #must be binary mode!
try:
manifest = ReadRocheXmlManifest(in_handle)
except ValueError:
manifest = None
#This makes two passes though the SFF file with isn't so efficient,
#but this makes the code simple.
pos_count = neg_count = 0
if out_positive_file is not None:
out_handle = open(out_positive_file, "wb")
writer = SffWriter(out_handle, xml=manifest)
in_handle.seek(0) #start again after getting manifest
pos_count = writer.write_file(rec for rec in SffIterator(in_handle) if clean_name(rec.id) in ids)
out_handle.close()
if out_negative_file is not None:
out_handle = open(out_negative_file, "wb")
writer = SffWriter(out_handle, xml=manifest)
in_handle.seek(0) #start again
neg_count = writer.write_file(rec for rec in SffIterator(in_handle) if clean_name(rec.id) not in ids)
out_handle.close()
#And we're done
in_handle.close()
#At the time of writing, Galaxy doesn't show SFF file read counts,
#so it is useful to put them in stdout and thus shown in job info.
print "%i with and %i without specified IDs" % (pos_count, neg_count)
elif seq_format.lower()=="fasta":
示例12: main
def main():
# Parse Command Line
try:
tabular_file, cols_arg, in_file, seq_format, out_positive_file, out_negative_file = sys.argv[1:]
except ValueError:
stop_err("Expected six arguments, got %i:\n%s" % (len(sys.argv) - 1, " ".join(sys.argv)))
try:
columns = [int(arg) - 1 for arg in cols_arg.split(",")]
except ValueError:
stop_err("Expected list of columns (comma separated integers), got %s" % cols_arg)
if out_positive_file == "-" and out_negative_file == "-":
stop_err("Neither output file requested")
# Read tabular file and record all specified identifiers
ids = set()
handle = open(tabular_file, "rU")
if len(columns) > 1:
# General case of many columns
for line in handle:
if line.startswith("#"):
# Ignore comments
continue
parts = line.rstrip("\n").split("\t")
for col in columns:
ids.add(parts[col])
print "Using %i IDs from %i columns of tabular file" % (len(ids), len(columns))
else:
# Single column, special case speed up
col = columns[0]
for line in handle:
if not line.startswith("#"):
ids.add(line.rstrip("\n").split("\t")[col])
print "Using %i IDs from tabular file" % (len(ids))
handle.close()
if seq_format.lower() == "sff":
# Now write filtered SFF file based on IDs from BLAST file
try:
from Bio.SeqIO.SffIO import SffIterator, SffWriter
except ImportError:
stop_err("Requires Biopython 1.54 or later")
try:
from Bio.SeqIO.SffIO import ReadRocheXmlManifest
except ImportError:
# Prior to Biopython 1.56 this was a private function
from Bio.SeqIO.SffIO import _sff_read_roche_index_xml as ReadRocheXmlManifest
in_handle = open(in_file, "rb") # must be binary mode!
try:
manifest = ReadRocheXmlManifest(in_handle)
except ValueError:
manifest = None
# This makes two passes though the SFF file with isn't so efficient,
# but this makes the code simple.
if out_positive_file != "-":
out_handle = open(out_positive_file, "wb")
writer = SffWriter(out_handle, xml=manifest)
in_handle.seek(0) # start again after getting manifest
pos_count = writer.write_file(rec for rec in SffIterator(in_handle) if rec.id in ids)
out_handle.close()
if out_negative_file != "-":
out_handle = open(out_negative_file, "wb")
writer = SffWriter(out_handle, xml=manifest)
in_handle.seek(0) # start again
neg_count = writer.write_file(rec for rec in SffIterator(in_handle) if rec.id not in ids)
out_handle.close()
# And we're done
in_handle.close()
# At the time of writing, Galaxy doesn't show SFF file read counts,
# so it is useful to put them in stdout and thus shown in job info.
if out_positive_file != "-" and out_negative_file != "-":
print "%i with and %i without specified IDs" % (pos_count, neg_count)
elif out_positive_file != "-":
print "%i with specified IDs" % pos_count
elif out_negative_file != "-":
print "%i without specified IDs" % neg_count
elif seq_format.lower() == "fasta":
# Write filtered FASTA file based on IDs from tabular file
reader = fastaReader(open(in_file, "rU"))
if out_positive_file != "-" and out_negative_file != "-":
print "Generating two FASTA files"
positive_writer = fastaWriter(open(out_positive_file, "w"))
negative_writer = fastaWriter(open(out_negative_file, "w"))
for record in reader:
# The [1:] is because the fastaReader leaves the > on the identifer.
if record.identifier and record.identifier.split()[0][1:] in ids:
positive_writer.write(record)
else:
negative_writer.write(record)
positive_writer.close()
negative_writer.close()
elif out_positive_file != "-":
print "Generating matching FASTA file"
positive_writer = fastaWriter(open(out_positive_file, "w"))
for record in reader:
# The [1:] is because the fastaReader leaves the > on the identifer.
if record.identifier and record.identifier.split()[0][1:] in ids:
positive_writer.write(record)
positive_writer.close()
#.........这里部分代码省略.........
示例13: PGMBarcode
class PGMBarcode( object ):
"""
Represents a barcode from IonTorrent
"""
def __init__( self, *args, **kwargs ):
"""
args - id_str, type, sequence, floworder, index, annotation, adapter, score_mode, score_cutoff
"""
self.id_str = kwargs['id_str']
self.type = kwargs['type']
self.sequence = kwargs['sequence']
self.floworder = kwargs['floworder']
self.index = kwargs['index']
self.annotation = kwargs['annotation']
self.adapter = kwargs['adapter']
self.score_mode = kwargs['score_mode']
self.score_cutoff = kwargs['score_cutoff']
self.sff_file = None
self.proc_name = None
self.reads_sff = kwargs['sfffilepath']
self.max_num = kwargs['max_num']
self._processed = 0
self._matched_reads = 0
def _readMatches( self, read ):
"""
read - Bio.Seq record representing a read from sff file
"""
return self.sequence.lower() == self._getReadBarcode( read )
def _getReadBarcode( self, read ):
"""
Returns the barcode for a given read which should be between the flow_key and adapter sequence
"""
start = len( read.annotations['flow_key'] )
end = read.annotations['clip_adapter_left'] - len( self.adapter )
seq = str( read.seq )
return seq[start:end].lower()
def reads_for_barcode( self, reads_file ):
"""
Generator method returning only reads for the barcode this
class instance is setup for
"""
for read in SeqIO.parse( reads_file, 'sff' ):
# Quit if max_num is reached
if self.max_num != 'All' and self._processed == self.max_num:
break
if self._readMatches( read ):
logger.debug( "%s: %s Matched Read %s" % (self.proc_name, self.id_str, read.id) )
self._matched_reads += 1
yield read
self._processed += 1
def run( self, proc_name = None ):
sffpath = self.id_str + '.sff'
try:
with open( sffpath, 'wb' ) as fh:
self.proc_name = proc_name
self.sff_file = SffWriter( fh )
self.sff_file.write_file( self.reads_for_barcode( self.reads_sff ) )
logger.info( "%s reads of %s matched %s" % (self._matched_reads, self._processed, self.id_str) )
except ValueError:
# No reads for barcode so remove the temporary file
os.unlink( sffpath )
示例14: open
try:
from Bio.SeqIO.SffIO import ReadRocheXmlManifest
except ImportError:
#Prior to Biopython 1.56 this was a private function
from Bio.SeqIO.SffIO import _sff_read_roche_index_xml as ReadRocheXmlManifest
in_handle = open(in_file, "rb") #must be binary mode!
try:
manifest = ReadRocheXmlManifest(in_handle)
except ValueError:
manifest = None
#This makes two passes though the SFF file with isn't so efficient,
#but this makes the code simple.
pos_count = neg_count = 0
if out_positive_file != "-":
out_handle = open(out_positive_file, "wb")
writer = SffWriter(out_handle, xml=manifest)
in_handle.seek(0) #start again after getting manifest
pos_count = writer.write_file(rec for rec in SffIterator(in_handle) if rec.id in ids)
out_handle.close()
if out_negative_file != "-":
out_handle = open(out_negative_file, "wb")
writer = SffWriter(out_handle, xml=manifest)
in_handle.seek(0) #start again
neg_count = writer.write_file(rec for rec in SffIterator(in_handle) if rec.id not in ids)
out_handle.close()
#And we're done
in_handle.close()
#At the time of writing, Galaxy doesn't show SFF file read counts,
#so it is useful to put them in stdout and thus shown in job info.
print "%i with and %i without specified IDs" % (pos_count, neg_count)
elif seq_format.lower()=="fasta":