本文整理汇总了Python中pyfastaq.sequences.file_reader函数的典型用法代码示例。如果您正苦于以下问题:Python file_reader函数的具体用法?Python file_reader怎么用?Python file_reader使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了file_reader函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: run
def run(description):
parser = argparse.ArgumentParser(
description = 'Takes a random subset of reads from a sequence file and optionally the corresponding read ' +
'from a mates file. Output is interleaved if mates file given',
usage = 'fastaq to_random_subset [options] <infile> <outfile> <percent>')
parser.add_argument('--mate_file', help='Name of mates file')
parser.add_argument('--seed', help='Seed for random number generator. If not given, python\'s default is used', metavar='INT')
parser.add_argument('infile', help='Name of input file')
parser.add_argument('outfile', help='Name of output file')
parser.add_argument('percent', type=float, help='Per cent probability of keeping any given read (pair) in [0,100]', metavar='FLOAT')
options = parser.parse_args()
random.seed(a=options.seed)
seq_reader = sequences.file_reader(options.infile)
fout = utils.open_file_write(options.outfile)
if options.mate_file:
mate_seq_reader = sequences.file_reader(options.mate_file)
for seq in seq_reader:
if options.mate_file:
try:
mate_seq = next(mate_seq_reader)
except StopIteration:
print('Error! Didn\'t get mate for read', seq.id, file=sys.stderr)
sys.exit(1)
if 100 * random.random() <= options.percent:
print(seq, file=fout)
if options.mate_file:
print(mate_seq, file=fout)
utils.close(fout)
示例2: test_file_reader_gff
def test_file_reader_gff(self):
'''Test read gff file'''
good_files = [
'sequences_test_gffv3.gff',
'sequences_test_gffv3.no_FASTA_line.gff'
]
good_files = [os.path.join(data_dir, x) for x in good_files]
for f in good_files:
reader = sequences.file_reader(f)
counter = 1
for seq in reader:
self.assertEqual(seq, sequences.Fasta('seq' + str(counter), 'ACGTACGTAC'))
counter += 1
bad_files = [
'sequences_test_gffv3.no_seq.gff',
'sequences_test_gffv3.no_seq.2.gff'
]
bad_files = [os.path.join(data_dir, x) for x in bad_files]
for filename in bad_files:
with self.assertRaises(sequences.Error):
reader = sequences.file_reader(filename)
for seq in reader:
pass
示例3: interleave
def interleave(infile_1, infile_2, outfile):
seq_reader_1 = sequences.file_reader(infile_1)
seq_reader_2 = sequences.file_reader(infile_2)
f_out = utils.open_file_write(outfile)
for seq_1 in seq_reader_1:
try:
seq_2 = next(seq_reader_2)
except:
utils.close(f_out)
raise Error('Error getting mate for sequence', seq_1.id, ' ... cannot continue')
print(seq_1, file=f_out)
print(seq_2, file=f_out)
try:
seq_2 = next(seq_reader_2)
except:
seq_2 = None
if seq_2 is not None:
utils.close(f_out)
raise Error('Error getting mate for sequence', seq_2.id, ' ... cannot continue')
utils.close(f_out)
示例4: interleave
def interleave(infile_1, infile_2, outfile, suffix1=None, suffix2=None):
'''Makes interleaved file from two sequence files. If used, will append suffix1 onto end
of every sequence name in infile_1, unless it already ends with suffix1. Similar for sufffix2.'''
seq_reader_1 = sequences.file_reader(infile_1)
seq_reader_2 = sequences.file_reader(infile_2)
f_out = utils.open_file_write(outfile)
for seq_1 in seq_reader_1:
try:
seq_2 = next(seq_reader_2)
except:
utils.close(f_out)
raise Error('Error getting mate for sequence', seq_1.id, ' ... cannot continue')
if suffix1 is not None and not seq_1.id.endswith(suffix1):
seq_1.id += suffix1
if suffix2 is not None and not seq_2.id.endswith(suffix2):
seq_2.id += suffix2
print(seq_1, file=f_out)
print(seq_2, file=f_out)
try:
seq_2 = next(seq_reader_2)
except:
seq_2 = None
if seq_2 is not None:
utils.close(f_out)
raise Error('Error getting mate for sequence', seq_2.id, ' ... cannot continue')
utils.close(f_out)
示例5: filter
def filter(
infile,
outfile,
minlength=0,
maxlength=float('inf'),
regex=None,
ids_file=None,
invert=False,
mate_in=None,
mate_out=None,
both_mates_pass=True,
):
ids_from_file = set()
if ids_file is not None:
f = utils.open_file_read(ids_file)
for line in f:
ids_from_file.add(line.rstrip())
utils.close(f)
if mate_in:
if mate_out is None:
raise Error('Error in filter! mate_in provided. Must also provide mate_out')
seq_reader_mate = sequences.file_reader(mate_in)
f_out_mate = utils.open_file_write(mate_out)
seq_reader = sequences.file_reader(infile)
f_out = utils.open_file_write(outfile)
if regex is not None:
r = re.compile(regex)
def passes(seq):
return minlength <= len(seq) <= maxlength \
and (regex is None or r.search(seq.id) is not None) \
and (ids_file is None or seq.id in ids_from_file)
for seq in seq_reader:
seq_passes = passes(seq)
if mate_in:
try:
seq_mate = next(seq_reader_mate)
except:
utils.close(f_out)
raise Error('Error getting mate for sequence', seq.id, ' ... cannot continue')
mate_passes = passes(seq_mate)
want_the_pair = (seq_passes and mate_passes) \
or (( seq_passes or mate_passes) and not both_mates_pass)
if want_the_pair != invert:
print(seq, file=f_out)
print(seq_mate, file=f_out_mate)
elif seq_passes != invert:
print(seq, file=f_out)
utils.close(f_out)
if mate_in:
utils.close(f_out_mate)
示例6: fasta_to_fastq
def fasta_to_fastq(fasta_in, qual_in, outfile):
fa_reader = sequences.file_reader(fasta_in)
qual_reader = sequences.file_reader(qual_in, read_quals=True)
f_out = utils.open_file_write(outfile)
for seq in fa_reader:
qual = next(qual_reader)
if seq.id != qual.id:
utils.close(f_out)
raise Error('Mismatch in names from fasta and qual file', seq.id, qual.id)
qual.seq = [int(x) for x in qual.seq.split()]
print(seq.to_Fastq(qual.seq), file=f_out)
utils.close(f_out)
示例7: acgtn_only
def acgtn_only(infile, outfile):
'''Replace every non-acgtn (case insensitve) character with an N'''
f = utils.open_file_write(outfile)
for seq in sequences.file_reader(infile):
seq.replace_non_acgt()
print(seq, file=f)
utils.close(f)
示例8: count_sequences
def count_sequences(infile):
'''Returns the number of sequences in a file'''
seq_reader = sequences.file_reader(infile)
n = 0
for seq in seq_reader:
n += 1
return n
示例9: trim_contigs
def trim_contigs(infile, outfile, trim):
seq_reader = sequences.file_reader(infile)
fout = utils.open_file_write(outfile)
for seq in seq_reader:
if len(seq) < 2 * trim:
continue
gaps = seq.gaps()
bases = list(seq.seq)
# extend the length of each gap
for gap in gaps:
left_start = max(gap.start - trim, 0)
right_end = min(gap.end + trim + 1, len(seq))
for i in range(left_start, gap.start):
bases[i] = 'N'
for i in range(gap.end, right_end):
bases[i] = 'N'
seq.seq = ''.join(bases)
# trim start/end bases and tidy up any resulting Ns at either end of the trimmed seq
seq.trim(trim, trim)
seq.trim_Ns()
# check that there is some non-N sequence left over
regex = re.compile('[^nN]')
if regex.search(seq.seq) is not None:
print(seq, file=fout)
utils.close(fout)
示例10: test_file_reader_fasta
def test_file_reader_fasta(self):
'''file_reader should iterate through a fasta file correctly'''
reader = sequences.file_reader(os.path.join(data_dir, 'sequences_test.fa'))
counter = 1
for seq in reader:
self.assertEqual(seq, sequences.Fasta(str(counter), 'ACGTA'))
counter += 1
示例11: to_fasta
def to_fasta(infile, outfile, line_length=60, strip_after_first_whitespace=False, check_unique=False):
seq_reader = sequences.file_reader(infile)
f_out = utils.open_file_write(outfile)
original_line_length = sequences.Fasta.line_length
sequences.Fasta.line_length = line_length
if check_unique:
used_names = {}
for seq in seq_reader:
if strip_after_first_whitespace:
seq.strip_after_first_whitespace()
if check_unique:
used_names[seq.id] = used_names.get(seq.id, 0) + 1
if type(seq) == sequences.Fastq:
print(sequences.Fasta(seq.id, seq.seq), file=f_out)
else:
print(seq, file=f_out)
utils.close(f_out)
sequences.Fasta.line_length = original_line_length
if check_unique:
all_unique = True
for name, count in used_names.items():
if count > 1:
print('Sequence name "' + name + '" not unique. Found', count, 'times', file=sys.stderr)
all_unique = False
if not all_unique:
raise Error('Not all sequence names unique. Cannot continue')
示例12: translate
def translate(infile, outfile, frame=0):
seq_reader = sequences.file_reader(infile)
fout = utils.open_file_write(outfile)
for seq in seq_reader:
print(seq.translate(frame=frame), file=fout)
utils.close(fout)
示例13: reverse_complement
def reverse_complement(infile, outfile):
seq_reader = sequences.file_reader(infile)
fout = utils.open_file_write(outfile)
for seq in seq_reader:
seq.revcomp()
print(seq, file=fout)
utils.close(fout)
示例14: replace_bases
def replace_bases(infile, outfile, old, new):
seq_reader = sequences.file_reader(infile)
f_out = utils.open_file_write(outfile)
for seq in seq_reader:
seq.replace_bases(old, new)
print(seq, file=f_out)
utils.close(f_out)
示例15: strip_illumina_suffix
def strip_illumina_suffix(infile, outfile):
seq_reader = sequences.file_reader(infile)
f_out = utils.open_file_write(outfile)
for seq in seq_reader:
seq.strip_illumina_suffix()
print(seq, file=f_out)
utils.close(f_out)