当前位置: 首页>>代码示例>>Python>>正文


Python io.read_sequences函数代码示例

本文整理汇总了Python中nesoni.io.read_sequences函数的典型用法代码示例。如果您正苦于以下问题:Python read_sequences函数的具体用法?Python read_sequences怎么用?Python read_sequences使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了read_sequences函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: build_snpeff

 def build_snpeff(self):
     jar = io.find_jar('snpEff.jar')
     
     with open(self/'snpeff.config','wb') as f:
         print >> f, 'data_dir = snpeff'
         print >> f, 'genomes : ' + self.name
         print >> f, self.name + '.genome : ' + self.name 
     
     snpwork = io.Workspace(self/'snpeff',must_exist=False)
     snpwork_genome = io.Workspace(snpwork/self.name,must_exist=False)
     snpwork_genomes = io.Workspace(snpwork/'genomes',must_exist=False)
     
     annotations = self.annotations_filename()
     assert annotations
     with open(snpwork_genome/'genes.gff','wb') as f:
         for record in annotation.read_annotations(annotations):
             if record.end <= record.start: continue
             if not record.attr:
                 record.attr['attributes'] = 'none'
             print >> f, record.as_gff()
     
     with open(snpwork_genomes/(self.name+'.fa'),'wb') as f:
         for name, seq in io.read_sequences(self.reference_fasta_filename()):
             io.write_fasta(f, name, seq)
             
     io.execute('java -jar JAR build NAME -gff3 -c CONFIG',
         JAR=jar, NAME=self.name, CONFIG=self/'snpeff.config')
开发者ID:Victorian-Bioinformatics-Consortium,项目名称:nesoni,代码行数:27,代码来源:reference_directory.py

示例2: run

    def run(self):
        extractions = [ ]
        for item in self.genes.split(','):
            extraction = item.split('/')
            assert len(extraction) == 4
            extractions.append(extraction)
            
        rename = { }
        if self.rename:
            for item in self.rename.split(','):
                old,new = item.split('=')
                rename[old] = new

        work = self.get_workspace()        
        
        with workspace.tempspace() as temp:
            items = list(annotation.read_annotations(self.annotation))
            for item in items:
                item.seqid = rename.get(item.seqid, item.seqid)
            annotation.write_gff3(temp/'temp.gff', get_genes(items, extractions, self.log))
            del items
            
            with open(temp/'temp.fa','wb') as f:
                for name,seq in io.read_sequences(self.genome):
                    name = name.split()[0]
                    name = rename.get(name,name)
                    io.write_fasta(f, name, seq)
            
            reference_directory.Make_tt_reference(
                self.output_dir,
                filenames = [ temp/'temp.fa', temp/'temp.gff' ] + self.extra,
                index = self.index, shrimp = self.shrimp, 
                bowtie = self.bowtie, star = self.star
                ).run()
开发者ID:Victorian-Bioinformatics-Consortium,项目名称:tail-tools,代码行数:34,代码来源:reference_directory_ensembl.py

示例3: make_file_for_primer_3

def make_file_for_primer_3 (gff_file, ref_file, names_file, output_file):
    # check for a tmp direcory 
    if len(glob.glob("./tmp")) == 0:
        call (["mkdir", "tmp"])

    gff_file = list(annotation.read_annotations(gff_file))
    print "\n Reading in the reference file"
    seq_dict = dict(io.read_sequences(ref_file))
    names_file = open(names_file).readlines()
    config  = open("primer_config.txt").readlines()

    with open("tmp/regions_" + output_file, 'w') as out_f:
        for name in names_file:
            sname = name.strip("\n")
            found = False
            for line in gff_file:
                gff_name = line.attr.get ("Name", "No_name")
                peak = line.attr.get ("id", "No_id")
                if sname in gff_name.split("/"):
                    out_f.write ("SEQUENCE_ID="+ gff_name.replace("/", "_") + "_" + peak + "\n")
                    out_f.write("SEQUENCE_TEMPLATE=" + line.shifted(-100, 0).get_seq(seq_dict) + "\n")
                    found = True
                    for cline in config:
                        out_f.write(cline.strip("\n")  + "\n")
                    out_f.write("="  + "\n")
            if found ==False:
                print "Could not find the gene " + sname + " in the gff file"
开发者ID:AndrewPattison,项目名称:Python-scripts,代码行数:27,代码来源:get_primers.py

示例4: main

def main(args):
    size, args = grace.get_option_value(args,'--size',int,200)
    stride, args = grace.get_option_value(args,'--stride',int,50)
    grace.expect_no_further_options(args)
    
    if not args:
        print USAGE
        return 1
    
    for filename in args:
        for name, seq in io.read_sequences(filename):
            name_parts = name.split(None, 1)
            name = name_parts[0]
            if len(name_parts) > 1:
               desc = ' ' + name_parts[1]
            else:
               desc = ''
            
            for i in xrange(-size+stride,len(seq),stride):
                start = max(0,min(len(seq),i))
                end = max(0,min(len(seq), i+size))
                io.write_fasta(
                    sys.stdout,
                    '%s:%d..%d' % (name,start+1,end) + desc,
                    seq[start:end]
                )
    
    return 0
开发者ID:Slugger70,项目名称:nesoni,代码行数:28,代码来源:shred.py

示例5: run

    def run(self):
        f = self.begin_output()
    
        for filename in self.filenames:
            any = False
            
            name = os.path.splitext(os.path.split(filename)[1])[0]
            
            try:
               iterator = io.read_sequences(filename, qualities=True)
            except grace.Error:
               iterator = None
               
            if iterator is not None:
                total = 0
                total_length = 0
                for seq in io.read_sequences(filename, qualities=True):
                    total += 1
                    total_length += len(seq[1])
                print >> f, grace.datum(name, 'sequences', total)
                print >> f, grace.datum(name, 'average length', float(total_length)/total)
                print >> f
                any = True
            
            try:
                iterator = annotation.read_annotations(filename)
            except grace.Error:
                iterator = None
                
            if iterator:
                total = 0
                counts = { }
                for item in iterator:
                    total += 1
                    counts[item.type] = counts.get(item.type,0)+1
                                
                print >> f, grace.datum(name, 'features', total)
                for key in sorted(counts):
                    print >> f, grace.datum(name, key + ' features', counts[key])
                print >> f
                any = True
            
            if not any:
                raise grace.Error(filename + ' is neither a sequence file nor an annotation file that nesoni can read.')

        self.end_output(f)
开发者ID:Slugger70,项目名称:nesoni,代码行数:46,代码来源:trivia.py

示例6: run

    def run(self):
        base = os.path.split(self.prefix)[1]
        
        annotations = [ ]
        sequences = [ ]
        
        for filename in self.filenames:
            any = False
            if io.is_sequence_file(filename):
                sequences.append(filename)
                any = True
            if annotation.is_annotation_file(filename):
                annotations.append(filename)
                any = True
            assert any, 'File is neither a recognized sequence or annotation file'

        cytoband_filename = os.path.join(self.prefix,base+'_cytoband.txt')
        property_filename = os.path.join(self.prefix,'property.txt')
        gff_filename = os.path.join(self.prefix,base+'.gff')
        output_filenames = [ cytoband_filename, property_filename, gff_filename ] 

        if not os.path.exists(self.prefix):
            os.mkdir(self.prefix)
            
        f = open(property_filename,'wb')
        print >> f, 'ordered=true'
        print >> f, 'id=%s' % base
        print >> f, 'name=%s' % (self.name or base)
        print >> f, 'cytobandFile=%s_cytoband.txt' % base
        print >> f, 'geneFile=%s.gff' % base
        print >> f, 'sequenceLocation=%s' % base
        f.close()
        
        trivia.As_gff(output=gff_filename,
               filenames=annotations,
               exclude=[ 'gene', 'source' ]
        ).run()
        
        f_cyt = open(cytoband_filename,'wb')
        for filename in sequences:
            for name, seq in io.read_sequences(filename):
                assert '/' not in name
                f = open(os.path.join(self.prefix, name + '.txt'), 'wb')
                f.write(seq)
                f.close()
                print >> f_cyt, '%s\t0\t%d' % (name, len(seq))
        f_cyt.close()
        
        genome_filename = self.prefix + '.genome'
        if os.path.exists(genome_filename):
            os.unlink(genome_filename)
        io.execute(
            ['zip', '-j', io.abspath(genome_filename)] +
            [ io.abspath(item) for item in output_filenames ]
        )
        for filename in output_filenames:
            if os.path.exists(filename):
                os.unlink(filename)
开发者ID:Slugger70,项目名称:nesoni,代码行数:58,代码来源:igv.py

示例7: get_lengths

 def get_lengths(self):
     #Legacy working directory
     if not self.object_exists('reference-lengths.pickle.gz'):
         lengths = [ ]
         for name, seq in io.read_sequences(self.reference_fasta_filename()):
             name = name.split()[0]
             lengths.append( (name, len(seq)) )
         self.set_object(lengths, 'reference-lengths.pickle.gz')
             
     return self.get_object('reference-lengths.pickle.gz')
开发者ID:Slugger70,项目名称:nesoni,代码行数:10,代码来源:reference_directory.py

示例8: convert

 def convert(filename):
     info = io.get_file_info(filename)
     ok = selection.matches('type-fastq:[compression-none/compression-gzip/compression-bzip2]', info)
     if ok:
         return filename            
     result_name = tempname()
     with open(result_name,'wb') as f:
         for name, seq, qual in io.read_sequences(filename, qualities='required'):
             io.write_fastq(f, name, seq, qual)
     return result_name
开发者ID:Puneet-Shivanand,项目名称:nesoni,代码行数:10,代码来源:bowtie.py

示例9: run

    def run(self):
        f = self.begin_output()
    
        for filename in self.filenames:
            info = io.get_file_info(filename)
            
            any = False
            
            name = os.path.splitext(os.path.split(filename)[1])[0]
            
            if info.matches('sequences'):
                total = 0
                total_length = 0
                for seq in io.read_sequences(filename, qualities=True):
                    total += 1
                    total_length += len(seq[1])
                print >> f, grace.datum(name, 'sequences', total)
                print >> f, grace.datum(name, 'total bases', total_length)
                if total:
                    print >> f, grace.datum(name, 'average length', float(total_length)/total)
                print >> f
                any = True
            
            if info.matches('annotations'):
                total = 0
                counts = { }
                for item in annotation.read_annotations(filename):
                    total += 1
                    counts[item.type] = counts.get(item.type,0)+1
                                
                print >> f, grace.datum(name, 'features', total)
                for key in sorted(counts):
                    print >> f, grace.datum(name, key + ' features', counts[key])
                print >> f
                any = True
            
            if info.matches('type-vcf'):
                reader_f = io.open_possibly_compressed_file(filename)
                reader = vcf.Reader(reader_f)
                n = 0
                for item in reader:
                    n += 1
                print >> f, grace.datum(name, 'variants', n)
                any = True
            
            if not any:
                raise grace.Error('Don\'t know what to do with ' + filename)

        self.end_output(f)
开发者ID:drpowell,项目名称:nesoni,代码行数:49,代码来源:trivia.py

示例10: iter_reads

def iter_reads(config, qualities=False):
    if 'stride' not in config:
        raise grace.Error('Please re-run nesoni shrimp, output format has changed')

    stride = config['stride']
    for reads_filename_set in config['reads']:
        if config['solid']:
            reader = [ io.read_solid(filename) for filename in reads_filename_set ]
        else:
            reader = [ io.read_sequences(filename, qualities) for filename in reads_filename_set ]
        reader = itertools.izip(*reader)
        
        for i, items in enumerate(reader):
            if i % stride == 0: 
               for item in items: 
                   yield item
开发者ID:Puneet-Shivanand,项目名称:nesoni,代码行数:16,代码来源:shrimp.py

示例11: run

    def run(self):
        workspace = self.get_workspace()
        
        reference = reference_directory.Reference(self.reference, must_exist=True)
        
        reader_f = io.open_possibly_compressed_file(self.vcf)
        reader = vcf.Reader(reader_f)
        variants = collections.defaultdict(list)
        for record in reader:
            variants[record.CHROM].append(record)
        reader_f.close()
        
        for chrom in variants:
            variants[chrom].sort(key=lambda item: item.POS)
        
        filenames = [ workspace/(item+'.fa') for item in reader.samples ]
        for filename in filenames:
            with open(filename,'wb'): pass
        
        for name, seq in io.read_sequences(reference.reference_fasta_filename()):
            for i, sample in enumerate(reader.samples):            
                revised = [ ]
                pos = 0
                for variant in variants[name]:
                    gt = variant.samples[i].data.GT
                    if gt is None: continue
                    assert gt.isdigit(), 'Unsupported genotype (can only use haploid genotypes): '+gt
                    gt_number = int(gt)
                    if gt_number == 0:
                        var_seq = variant.REF
                    else:
                        var_seq = str(variant.ALT[gt_number-1])
                        assert re.match('[ACGTN]*$', var_seq), 'Unsupported variant type: '+var_seq
                    new_pos = variant.POS-1
                    assert new_pos >= pos, 'Variants overlap.'
                    revised.append(seq[pos:new_pos])
                    pos = new_pos
                    revised.append(var_seq)
                    assert seq[pos:pos+len(variant.REF)].upper() == variant.REF, 'REF column in VCF does not match reference sequence'
                    pos += len(variant.REF)
                revised.append(seq[pos:])
                            
                with open(filenames[i],'ab') as f:
                    io.write_fasta(f, name, ''.join(revised))

            del variants[name]        
        assert not variants, 'Chromosome names in VCF not in reference: '+' '.join(variants)
开发者ID:simonalpha,项目名称:nesoni,代码行数:47,代码来源:variant.py

示例12: run

    def run(self):    
        f = self.begin_output()
    
        for filename in self.filenames:
            for name, seq in io.read_sequences(filename):
                name_parts = name.split(None, 1)
                name = name_parts[0]
                for i in xrange(-self.size+self.stride,len(seq),self.stride):
                    start = max(0,min(len(seq),i))
                    end = max(0,min(len(seq), i+self.size))
                    shred_name = '%s:%d..%d' % (name,start+1,end)
                    shred_seq = seq
                    if self.quality:
                        io.write_fastq(f, shred_name, seq[start:end], chr(33+self.quality)*(end-start))
                    else:
                        io.write_fasta(f, shred_name, seq[start:end])

        self.end_output(f)
开发者ID:Puneet-Shivanand,项目名称:nesoni,代码行数:18,代码来源:shred.py

示例13: debias

def debias(args):
    import numpy

    radius, args = grace.get_option_value(args, '--radius', int, 2) 

    dirs = args
    
    for dir_name in dirs:
        for name, seq in io.read_sequences(os.path.join(dir_name,'reference.fa')):
            for suffix, ambig_suffix in [
                ('-depth', '-ambiguous-depth'),
                ('-pairspan-depth', '-ambiguous-pairspan-depth'),
            ]:
                root = grace.filesystem_friendly_name(name)
                full_name = os.path.join(dir_name, root + suffix + '.userplot')
                full_ambig_name = os.path.join(dir_name, root + ambig_suffix + '.userplot')
                if not os.path.exists(full_name): continue
                if not os.path.exists(full_ambig_name): continue
                
                output_suffix = '-%d.userplot' % radius 

                print dir_name, root, output_suffix
                
                depths = numpy.array( read_unstranded_userplot(full_name) )
                ambig_depths = numpy.array( read_unstranded_userplot(full_ambig_name) )
                expect = expected_depth(root, seq, depths, ambig_depths, radius)
                
                write_unstranded_userplot(
                    os.path.join(dir_name, root + suffix + '-expected' + output_suffix),
                    expect) 
                
                corrected = depths / expect * numpy.median(expect)
                corrected[expect <= 5.0] = 0.0
                write_unstranded_userplot(
                    os.path.join(dir_name, root + suffix + '-corrected' + output_suffix),
                    corrected)                 
                
                ambig_corrected = ambig_depths / expect * numpy.median(expect)
                ambig_corrected[expect <= 0.0] = 0.0
                write_unstranded_userplot(
                    os.path.join(dir_name, root + ambig_suffix + '-corrected' + output_suffix),
                    ambig_corrected)                 
开发者ID:drpowell,项目名称:nesoni,代码行数:42,代码来源:trivia.py

示例14: set_sequences

    def set_sequences(self, filenames):        
        reference_genbank_filename = self / 'reference.gbk'
        reference_filename = self / 'reference.fa'

        reference_genbank_file = open(reference_genbank_filename,'wb')
        any_genbank = [ False ]

        def genbank_callback(name, record):
            """ Make a copy of any genbank files passed in. """
            from Bio import SeqIO
            
            SeqIO.write([record], reference_genbank_file, 'genbank')
            
            f = open(self / (grace.filesystem_friendly_name(name) + '.gbk'), 'wb')
            SeqIO.write([record], f, 'genbank')
            f.close()
            
            any_genbank[0] = True
        
        lengths = [ ]
        seen = set()
        f = open(reference_filename, 'wb')
        for filename in filenames:
            for name, seq in io.read_sequences(filename, genbank_callback=genbank_callback):
                name = name.split()[0]
                assert name not in seen, 'Duplicate chromosome name: ' + name
                seen.add(name)
                lengths.append( (name, len(seq)) )
                io.write_fasta(f, name, seq)
        f.close()        
        self.set_object(lengths, 'reference-lengths.pickle.gz')
        
        reference_genbank_file.close()
        if not any_genbank[0]:
            os.unlink(reference_genbank_filename)
            
        # Create an index of the reference sequences for samtools
        io.execute([
            'samtools', 'faidx', reference_filename
        ])
开发者ID:Victorian-Bioinformatics-Consortium,项目名称:nesoni,代码行数:40,代码来源:reference_directory.py

示例15: run

    def run(self):
        #mincov, args = grace.get_option_value(args, '--mincov', int, 1) 
        #maxdiff, args = grace.get_option_value(args, '--maxdiff', int, 16) 
        #minsize, args = grace.get_option_value(args, '--minsize', int, 200)
        #what, args = grace.get_option_value(args, '--what', as_core_or_unique, 'core')    
        #is_core = (what == 'core') 
        #
        #grace.expect_no_further_options(args)
        #
        #if len(args) < 2:
        #    print >> sys.stderr, HELP
        #    raise grace.Help_shown()
        #
        #output_dir, working_dirs = args[0], args[1:]
        #
        ##assert not path.exists(path.join(output_dir, 'reference.fa')), \
        #assert not path.exists(path.join(output_dir, 'parameters')), \
        #        'Output directory not given'
        #
        #if not path.exists(output_dir):
        #    os.mkdir(output_dir)

        assert self.what in ('core','unique'), 'Expected --what to be either "core" or "unique".'
        is_core = (self.what == 'core') 
        
        workspace = self.get_workspace()
        
        for name, seq in io.read_sequences(working_directory.Working(self.working_dirs[0]).get_reference().reference_fasta_filename()):
            self.log.log(name + '\n')
            friendly_name = grace.filesystem_friendly_name(name)
            
            good = [ True ] * len(seq)
            
            for working_dir in self.working_dirs:
                if is_core:
                   suffix = '-depth.userplot'
                else:
                   suffix = '-ambiguous-depth.userplot'
                data = trivia.read_unstranded_userplot(
                    os.path.join(working_dir, friendly_name+suffix)
                )
                assert len(seq) == len(data)
                for i in xrange(len(seq)):
                   if good[i]:
                       if is_core:
                           good[i] = data[i] >= self.mincov
                       else:
                           good[i] = data[i] < self.mincov
    
            #Close holes
            start = -self.maxdiff-1
            n_holes = 0
            for i in xrange(len(seq)):
                if good[i]:
                     if 0 < i-start <= self.maxdiff:
                         for j in xrange(start,i): good[j] = True
                         n_holes += 1
                     start = i+1
            self.log.log('Closed '+grace.pretty_number(n_holes)+' holes\n')
            
            
            f = open( workspace/('%s-%s.fa' % (friendly_name,self.what)), 'wb')
            io.write_fasta(f, name,
                ''.join([ (seq[i] if good[i] else 'N')
                          for i in xrange(len(seq)) ])
            )
            f.close()
    
            f = open( workspace/('%s-%s_masked.fa' % (friendly_name,self.what)), 'wb')
            io.write_fasta(f, name,
                ''.join([ (seq[i] if good[i] else seq[i].lower())
                          for i in xrange(len(seq)) ])
            )
            f.close()
    
            f_good = open( workspace/('%s-%s_parts.fa' % (friendly_name,self.what)), 'wb')
            f_nongood = open( workspace/('%s-non%s_parts.fa' % (friendly_name,self.what)), 'wb')
            start = 0
            n_good = [0]
            n_good_bases = [0]    
            def emit(i):
                if i-start < self.minsize: return
                if good[start]:
                    n_good[0] += 1
                    n_good_bases[0] += i-start
                io.write_fasta(
                    f_good if good[start] else f_nongood,
                    '%s:%d..%d' % (name, start+1,i),
                    seq[start:i]
                )
            for i in xrange(1,len(seq)):
                if good[i] != good[start]:
                    emit(i)
                    start = i
            emit(len(seq))
            f_nongood.close()
            f_good.close()
            
            self.log.log(grace.pretty_number(sum(good))+' bases are '+self.what+', of '+grace.pretty_number(len(seq))+' in reference sequence\n')
            self.log.log(grace.pretty_number(n_good[0])+' parts at least '+grace.pretty_number(self.minsize)+' bases long with '+grace.pretty_number(n_good_bases[0])+' total bases\n')
#.........这里部分代码省略.........
开发者ID:Puneet-Shivanand,项目名称:nesoni,代码行数:101,代码来源:core.py


注:本文中的nesoni.io.read_sequences函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。