本文整理汇总了Python中nesoni.io.read_sequences函数的典型用法代码示例。如果您正苦于以下问题:Python read_sequences函数的具体用法?Python read_sequences怎么用?Python read_sequences使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了read_sequences函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: build_snpeff
def build_snpeff(self):
jar = io.find_jar('snpEff.jar')
with open(self/'snpeff.config','wb') as f:
print >> f, 'data_dir = snpeff'
print >> f, 'genomes : ' + self.name
print >> f, self.name + '.genome : ' + self.name
snpwork = io.Workspace(self/'snpeff',must_exist=False)
snpwork_genome = io.Workspace(snpwork/self.name,must_exist=False)
snpwork_genomes = io.Workspace(snpwork/'genomes',must_exist=False)
annotations = self.annotations_filename()
assert annotations
with open(snpwork_genome/'genes.gff','wb') as f:
for record in annotation.read_annotations(annotations):
if record.end <= record.start: continue
if not record.attr:
record.attr['attributes'] = 'none'
print >> f, record.as_gff()
with open(snpwork_genomes/(self.name+'.fa'),'wb') as f:
for name, seq in io.read_sequences(self.reference_fasta_filename()):
io.write_fasta(f, name, seq)
io.execute('java -jar JAR build NAME -gff3 -c CONFIG',
JAR=jar, NAME=self.name, CONFIG=self/'snpeff.config')
示例2: run
def run(self):
extractions = [ ]
for item in self.genes.split(','):
extraction = item.split('/')
assert len(extraction) == 4
extractions.append(extraction)
rename = { }
if self.rename:
for item in self.rename.split(','):
old,new = item.split('=')
rename[old] = new
work = self.get_workspace()
with workspace.tempspace() as temp:
items = list(annotation.read_annotations(self.annotation))
for item in items:
item.seqid = rename.get(item.seqid, item.seqid)
annotation.write_gff3(temp/'temp.gff', get_genes(items, extractions, self.log))
del items
with open(temp/'temp.fa','wb') as f:
for name,seq in io.read_sequences(self.genome):
name = name.split()[0]
name = rename.get(name,name)
io.write_fasta(f, name, seq)
reference_directory.Make_tt_reference(
self.output_dir,
filenames = [ temp/'temp.fa', temp/'temp.gff' ] + self.extra,
index = self.index, shrimp = self.shrimp,
bowtie = self.bowtie, star = self.star
).run()
开发者ID:Victorian-Bioinformatics-Consortium,项目名称:tail-tools,代码行数:34,代码来源:reference_directory_ensembl.py
示例3: make_file_for_primer_3
def make_file_for_primer_3 (gff_file, ref_file, names_file, output_file):
# check for a tmp direcory
if len(glob.glob("./tmp")) == 0:
call (["mkdir", "tmp"])
gff_file = list(annotation.read_annotations(gff_file))
print "\n Reading in the reference file"
seq_dict = dict(io.read_sequences(ref_file))
names_file = open(names_file).readlines()
config = open("primer_config.txt").readlines()
with open("tmp/regions_" + output_file, 'w') as out_f:
for name in names_file:
sname = name.strip("\n")
found = False
for line in gff_file:
gff_name = line.attr.get ("Name", "No_name")
peak = line.attr.get ("id", "No_id")
if sname in gff_name.split("/"):
out_f.write ("SEQUENCE_ID="+ gff_name.replace("/", "_") + "_" + peak + "\n")
out_f.write("SEQUENCE_TEMPLATE=" + line.shifted(-100, 0).get_seq(seq_dict) + "\n")
found = True
for cline in config:
out_f.write(cline.strip("\n") + "\n")
out_f.write("=" + "\n")
if found ==False:
print "Could not find the gene " + sname + " in the gff file"
示例4: main
def main(args):
size, args = grace.get_option_value(args,'--size',int,200)
stride, args = grace.get_option_value(args,'--stride',int,50)
grace.expect_no_further_options(args)
if not args:
print USAGE
return 1
for filename in args:
for name, seq in io.read_sequences(filename):
name_parts = name.split(None, 1)
name = name_parts[0]
if len(name_parts) > 1:
desc = ' ' + name_parts[1]
else:
desc = ''
for i in xrange(-size+stride,len(seq),stride):
start = max(0,min(len(seq),i))
end = max(0,min(len(seq), i+size))
io.write_fasta(
sys.stdout,
'%s:%d..%d' % (name,start+1,end) + desc,
seq[start:end]
)
return 0
示例5: run
def run(self):
f = self.begin_output()
for filename in self.filenames:
any = False
name = os.path.splitext(os.path.split(filename)[1])[0]
try:
iterator = io.read_sequences(filename, qualities=True)
except grace.Error:
iterator = None
if iterator is not None:
total = 0
total_length = 0
for seq in io.read_sequences(filename, qualities=True):
total += 1
total_length += len(seq[1])
print >> f, grace.datum(name, 'sequences', total)
print >> f, grace.datum(name, 'average length', float(total_length)/total)
print >> f
any = True
try:
iterator = annotation.read_annotations(filename)
except grace.Error:
iterator = None
if iterator:
total = 0
counts = { }
for item in iterator:
total += 1
counts[item.type] = counts.get(item.type,0)+1
print >> f, grace.datum(name, 'features', total)
for key in sorted(counts):
print >> f, grace.datum(name, key + ' features', counts[key])
print >> f
any = True
if not any:
raise grace.Error(filename + ' is neither a sequence file nor an annotation file that nesoni can read.')
self.end_output(f)
示例6: run
def run(self):
base = os.path.split(self.prefix)[1]
annotations = [ ]
sequences = [ ]
for filename in self.filenames:
any = False
if io.is_sequence_file(filename):
sequences.append(filename)
any = True
if annotation.is_annotation_file(filename):
annotations.append(filename)
any = True
assert any, 'File is neither a recognized sequence or annotation file'
cytoband_filename = os.path.join(self.prefix,base+'_cytoband.txt')
property_filename = os.path.join(self.prefix,'property.txt')
gff_filename = os.path.join(self.prefix,base+'.gff')
output_filenames = [ cytoband_filename, property_filename, gff_filename ]
if not os.path.exists(self.prefix):
os.mkdir(self.prefix)
f = open(property_filename,'wb')
print >> f, 'ordered=true'
print >> f, 'id=%s' % base
print >> f, 'name=%s' % (self.name or base)
print >> f, 'cytobandFile=%s_cytoband.txt' % base
print >> f, 'geneFile=%s.gff' % base
print >> f, 'sequenceLocation=%s' % base
f.close()
trivia.As_gff(output=gff_filename,
filenames=annotations,
exclude=[ 'gene', 'source' ]
).run()
f_cyt = open(cytoband_filename,'wb')
for filename in sequences:
for name, seq in io.read_sequences(filename):
assert '/' not in name
f = open(os.path.join(self.prefix, name + '.txt'), 'wb')
f.write(seq)
f.close()
print >> f_cyt, '%s\t0\t%d' % (name, len(seq))
f_cyt.close()
genome_filename = self.prefix + '.genome'
if os.path.exists(genome_filename):
os.unlink(genome_filename)
io.execute(
['zip', '-j', io.abspath(genome_filename)] +
[ io.abspath(item) for item in output_filenames ]
)
for filename in output_filenames:
if os.path.exists(filename):
os.unlink(filename)
示例7: get_lengths
def get_lengths(self):
#Legacy working directory
if not self.object_exists('reference-lengths.pickle.gz'):
lengths = [ ]
for name, seq in io.read_sequences(self.reference_fasta_filename()):
name = name.split()[0]
lengths.append( (name, len(seq)) )
self.set_object(lengths, 'reference-lengths.pickle.gz')
return self.get_object('reference-lengths.pickle.gz')
示例8: convert
def convert(filename):
info = io.get_file_info(filename)
ok = selection.matches('type-fastq:[compression-none/compression-gzip/compression-bzip2]', info)
if ok:
return filename
result_name = tempname()
with open(result_name,'wb') as f:
for name, seq, qual in io.read_sequences(filename, qualities='required'):
io.write_fastq(f, name, seq, qual)
return result_name
示例9: run
def run(self):
f = self.begin_output()
for filename in self.filenames:
info = io.get_file_info(filename)
any = False
name = os.path.splitext(os.path.split(filename)[1])[0]
if info.matches('sequences'):
total = 0
total_length = 0
for seq in io.read_sequences(filename, qualities=True):
total += 1
total_length += len(seq[1])
print >> f, grace.datum(name, 'sequences', total)
print >> f, grace.datum(name, 'total bases', total_length)
if total:
print >> f, grace.datum(name, 'average length', float(total_length)/total)
print >> f
any = True
if info.matches('annotations'):
total = 0
counts = { }
for item in annotation.read_annotations(filename):
total += 1
counts[item.type] = counts.get(item.type,0)+1
print >> f, grace.datum(name, 'features', total)
for key in sorted(counts):
print >> f, grace.datum(name, key + ' features', counts[key])
print >> f
any = True
if info.matches('type-vcf'):
reader_f = io.open_possibly_compressed_file(filename)
reader = vcf.Reader(reader_f)
n = 0
for item in reader:
n += 1
print >> f, grace.datum(name, 'variants', n)
any = True
if not any:
raise grace.Error('Don\'t know what to do with ' + filename)
self.end_output(f)
示例10: iter_reads
def iter_reads(config, qualities=False):
if 'stride' not in config:
raise grace.Error('Please re-run nesoni shrimp, output format has changed')
stride = config['stride']
for reads_filename_set in config['reads']:
if config['solid']:
reader = [ io.read_solid(filename) for filename in reads_filename_set ]
else:
reader = [ io.read_sequences(filename, qualities) for filename in reads_filename_set ]
reader = itertools.izip(*reader)
for i, items in enumerate(reader):
if i % stride == 0:
for item in items:
yield item
示例11: run
def run(self):
workspace = self.get_workspace()
reference = reference_directory.Reference(self.reference, must_exist=True)
reader_f = io.open_possibly_compressed_file(self.vcf)
reader = vcf.Reader(reader_f)
variants = collections.defaultdict(list)
for record in reader:
variants[record.CHROM].append(record)
reader_f.close()
for chrom in variants:
variants[chrom].sort(key=lambda item: item.POS)
filenames = [ workspace/(item+'.fa') for item in reader.samples ]
for filename in filenames:
with open(filename,'wb'): pass
for name, seq in io.read_sequences(reference.reference_fasta_filename()):
for i, sample in enumerate(reader.samples):
revised = [ ]
pos = 0
for variant in variants[name]:
gt = variant.samples[i].data.GT
if gt is None: continue
assert gt.isdigit(), 'Unsupported genotype (can only use haploid genotypes): '+gt
gt_number = int(gt)
if gt_number == 0:
var_seq = variant.REF
else:
var_seq = str(variant.ALT[gt_number-1])
assert re.match('[ACGTN]*$', var_seq), 'Unsupported variant type: '+var_seq
new_pos = variant.POS-1
assert new_pos >= pos, 'Variants overlap.'
revised.append(seq[pos:new_pos])
pos = new_pos
revised.append(var_seq)
assert seq[pos:pos+len(variant.REF)].upper() == variant.REF, 'REF column in VCF does not match reference sequence'
pos += len(variant.REF)
revised.append(seq[pos:])
with open(filenames[i],'ab') as f:
io.write_fasta(f, name, ''.join(revised))
del variants[name]
assert not variants, 'Chromosome names in VCF not in reference: '+' '.join(variants)
示例12: run
def run(self):
f = self.begin_output()
for filename in self.filenames:
for name, seq in io.read_sequences(filename):
name_parts = name.split(None, 1)
name = name_parts[0]
for i in xrange(-self.size+self.stride,len(seq),self.stride):
start = max(0,min(len(seq),i))
end = max(0,min(len(seq), i+self.size))
shred_name = '%s:%d..%d' % (name,start+1,end)
shred_seq = seq
if self.quality:
io.write_fastq(f, shred_name, seq[start:end], chr(33+self.quality)*(end-start))
else:
io.write_fasta(f, shred_name, seq[start:end])
self.end_output(f)
示例13: debias
def debias(args):
import numpy
radius, args = grace.get_option_value(args, '--radius', int, 2)
dirs = args
for dir_name in dirs:
for name, seq in io.read_sequences(os.path.join(dir_name,'reference.fa')):
for suffix, ambig_suffix in [
('-depth', '-ambiguous-depth'),
('-pairspan-depth', '-ambiguous-pairspan-depth'),
]:
root = grace.filesystem_friendly_name(name)
full_name = os.path.join(dir_name, root + suffix + '.userplot')
full_ambig_name = os.path.join(dir_name, root + ambig_suffix + '.userplot')
if not os.path.exists(full_name): continue
if not os.path.exists(full_ambig_name): continue
output_suffix = '-%d.userplot' % radius
print dir_name, root, output_suffix
depths = numpy.array( read_unstranded_userplot(full_name) )
ambig_depths = numpy.array( read_unstranded_userplot(full_ambig_name) )
expect = expected_depth(root, seq, depths, ambig_depths, radius)
write_unstranded_userplot(
os.path.join(dir_name, root + suffix + '-expected' + output_suffix),
expect)
corrected = depths / expect * numpy.median(expect)
corrected[expect <= 5.0] = 0.0
write_unstranded_userplot(
os.path.join(dir_name, root + suffix + '-corrected' + output_suffix),
corrected)
ambig_corrected = ambig_depths / expect * numpy.median(expect)
ambig_corrected[expect <= 0.0] = 0.0
write_unstranded_userplot(
os.path.join(dir_name, root + ambig_suffix + '-corrected' + output_suffix),
ambig_corrected)
示例14: set_sequences
def set_sequences(self, filenames):
reference_genbank_filename = self / 'reference.gbk'
reference_filename = self / 'reference.fa'
reference_genbank_file = open(reference_genbank_filename,'wb')
any_genbank = [ False ]
def genbank_callback(name, record):
""" Make a copy of any genbank files passed in. """
from Bio import SeqIO
SeqIO.write([record], reference_genbank_file, 'genbank')
f = open(self / (grace.filesystem_friendly_name(name) + '.gbk'), 'wb')
SeqIO.write([record], f, 'genbank')
f.close()
any_genbank[0] = True
lengths = [ ]
seen = set()
f = open(reference_filename, 'wb')
for filename in filenames:
for name, seq in io.read_sequences(filename, genbank_callback=genbank_callback):
name = name.split()[0]
assert name not in seen, 'Duplicate chromosome name: ' + name
seen.add(name)
lengths.append( (name, len(seq)) )
io.write_fasta(f, name, seq)
f.close()
self.set_object(lengths, 'reference-lengths.pickle.gz')
reference_genbank_file.close()
if not any_genbank[0]:
os.unlink(reference_genbank_filename)
# Create an index of the reference sequences for samtools
io.execute([
'samtools', 'faidx', reference_filename
])
示例15: run
def run(self):
#mincov, args = grace.get_option_value(args, '--mincov', int, 1)
#maxdiff, args = grace.get_option_value(args, '--maxdiff', int, 16)
#minsize, args = grace.get_option_value(args, '--minsize', int, 200)
#what, args = grace.get_option_value(args, '--what', as_core_or_unique, 'core')
#is_core = (what == 'core')
#
#grace.expect_no_further_options(args)
#
#if len(args) < 2:
# print >> sys.stderr, HELP
# raise grace.Help_shown()
#
#output_dir, working_dirs = args[0], args[1:]
#
##assert not path.exists(path.join(output_dir, 'reference.fa')), \
#assert not path.exists(path.join(output_dir, 'parameters')), \
# 'Output directory not given'
#
#if not path.exists(output_dir):
# os.mkdir(output_dir)
assert self.what in ('core','unique'), 'Expected --what to be either "core" or "unique".'
is_core = (self.what == 'core')
workspace = self.get_workspace()
for name, seq in io.read_sequences(working_directory.Working(self.working_dirs[0]).get_reference().reference_fasta_filename()):
self.log.log(name + '\n')
friendly_name = grace.filesystem_friendly_name(name)
good = [ True ] * len(seq)
for working_dir in self.working_dirs:
if is_core:
suffix = '-depth.userplot'
else:
suffix = '-ambiguous-depth.userplot'
data = trivia.read_unstranded_userplot(
os.path.join(working_dir, friendly_name+suffix)
)
assert len(seq) == len(data)
for i in xrange(len(seq)):
if good[i]:
if is_core:
good[i] = data[i] >= self.mincov
else:
good[i] = data[i] < self.mincov
#Close holes
start = -self.maxdiff-1
n_holes = 0
for i in xrange(len(seq)):
if good[i]:
if 0 < i-start <= self.maxdiff:
for j in xrange(start,i): good[j] = True
n_holes += 1
start = i+1
self.log.log('Closed '+grace.pretty_number(n_holes)+' holes\n')
f = open( workspace/('%s-%s.fa' % (friendly_name,self.what)), 'wb')
io.write_fasta(f, name,
''.join([ (seq[i] if good[i] else 'N')
for i in xrange(len(seq)) ])
)
f.close()
f = open( workspace/('%s-%s_masked.fa' % (friendly_name,self.what)), 'wb')
io.write_fasta(f, name,
''.join([ (seq[i] if good[i] else seq[i].lower())
for i in xrange(len(seq)) ])
)
f.close()
f_good = open( workspace/('%s-%s_parts.fa' % (friendly_name,self.what)), 'wb')
f_nongood = open( workspace/('%s-non%s_parts.fa' % (friendly_name,self.what)), 'wb')
start = 0
n_good = [0]
n_good_bases = [0]
def emit(i):
if i-start < self.minsize: return
if good[start]:
n_good[0] += 1
n_good_bases[0] += i-start
io.write_fasta(
f_good if good[start] else f_nongood,
'%s:%d..%d' % (name, start+1,i),
seq[start:i]
)
for i in xrange(1,len(seq)):
if good[i] != good[start]:
emit(i)
start = i
emit(len(seq))
f_nongood.close()
f_good.close()
self.log.log(grace.pretty_number(sum(good))+' bases are '+self.what+', of '+grace.pretty_number(len(seq))+' in reference sequence\n')
self.log.log(grace.pretty_number(n_good[0])+' parts at least '+grace.pretty_number(self.minsize)+' bases long with '+grace.pretty_number(n_good_bases[0])+' total bases\n')
#.........这里部分代码省略.........