当前位置: 首页>>代码示例>>Python>>正文


Python Fasta.keys方法代码示例

本文整理汇总了Python中pyfasta.Fasta.keys方法的典型用法代码示例。如果您正苦于以下问题:Python Fasta.keys方法的具体用法?Python Fasta.keys怎么用?Python Fasta.keys使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyfasta.Fasta的用法示例。


在下文中一共展示了Fasta.keys方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: check_keyfn

# 需要导入模块: from pyfasta import Fasta [as 别名]
# 或者: from pyfasta.Fasta import keys [as 别名]
def check_keyfn(path, klass, inplace):
    f = Fasta(path, record_class=klass, flatten_inplace=inplace, key_fn=lambda key: key.split()[0])
    assert sorted(f.keys()) == ['a', 'b', 'c'], f.keys()
    fix(path)
    ff = Fasta(path, record_class=klass, flatten_inplace=inplace)
    assert sorted(ff.keys()) == ['a extra', 'b extra', 'c extra'], (ff.keys(), klass)
    fix(path)
开发者ID:jamescasbon,项目名称:pyfasta,代码行数:9,代码来源:test_all.py

示例2: check_keyfn2

# 需要导入模块: from pyfasta import Fasta [as 别名]
# 或者: from pyfasta.Fasta import keys [as 别名]
def check_keyfn2(path, klass, inplace):
    f = Fasta(path, record_class=klass, flatten_inplace=inplace, key_fn=lambda
            key: "-".join(key.split()))

    assert sorted(f.keys()) == ['a-extra', 'b-extra', 'c-extra'], f.keys()

    assert f['a-extra']
    fix(path)
开发者ID:brentp,项目名称:pyfasta,代码行数:10,代码来源:test_all.py

示例3: main

# 需要导入模块: from pyfasta import Fasta [as 别名]
# 或者: from pyfasta.Fasta import keys [as 别名]
def main():
    args = make_parser()
    if args.inplace:
        f = Fasta(args.fasta_file, flatten_inplace=True)
    else:
        f = Fasta(args.fasta_file)

    if args.output_file is not None:
        output = open(args.output_file, 'w')
    else:
        output_file_name = args.fasta_file.split('.')[0]
        output_file = '{0}.phylip'.format(output_file_name)
        output = open(output_file, 'w')

    sequence_count = len(f.keys())
    sequence_length = len(f[next(iter(f.keys()))])
    # print('', sequence_count, sequence_length, sep=' ')
    output.write(' {0} {1}\n'.format(sequence_count, sequence_length))

    for key in f.keys():
        subseq = []
        for chunk in grouper(f[key][:LINE_LENGTH], CHUNK_LENGTH):
            subseq.append(''.join(item[0] for item in chunk))
        subseq = ' '.join(subseq)
        if len(key) < CHUNK_LENGTH:
            key = key.ljust(CHUNK_LENGTH)
        else:
            key = key[:CHUNK_LENGTH]
        # print(key, ' ', subseq)
        output.write('{0} {1}\n'.format(key, subseq))

    sequence_length -= LINE_LENGTH
    start = LINE_LENGTH
    stop = LINE_LENGTH * 2
    # print()
    output.write('\n')

    while sequence_length > 0:
        for key in f.keys():
            subseq = []
            for chunk in grouper(f[key][start:stop], CHUNK_LENGTH, ' '):
                subseq.append(''.join(item[0] for item in chunk))
            subseq = ' '.join(subseq)
            # print(PAD_STRING, ' ', subseq)
            output.write('{0} {1}\n'.format(PAD_STRING, subseq))
        sequence_length -= LINE_LENGTH
        start += LINE_LENGTH
        stop += LINE_LENGTH
        # print()
        output.write('\n')

    output.close()
开发者ID:davidmnoriega,项目名称:fast2phy,代码行数:54,代码来源:__init__.py

示例4: run

# 需要导入模块: from pyfasta import Fasta [as 别名]
# 或者: from pyfasta.Fasta import keys [as 别名]
    def run(self, filename):
        self.openOutFiles(filename)
        f = Fasta(filename)

        count = len(f)
        self.not_found_in_kabat, self.fr4_not_found, current = (0, 0, 0)

        for name in f.keys():
            current += 1
            if current % 1000 == 0:
                print "All %d. Current: %d" % (count, current)
                # format: vName_jName{frameNumber} or vName_dName{frameNumber}_jName{frameNumber}

            vGeneName = name.split("_")[0]

            vGeneRegions = self.getVGeneRegions(vGeneName)
            if vGeneRegions is None:
                continue

            withoutMarkup = f[name][vGeneRegions[self.kabat.regions_count * 2 - 1]:]
            group = self.findFR4(name, withoutMarkup)
            if group is None:
                continue

            self.result_kabat_file.write(name)
            self.result_kabat_file.write(("\t%d" * 10) % tuple(vGeneRegions))
            self.result_kabat_file.write(("\t%d" * 4 + "\n") % tuple(
                [vGeneRegions[9] + i for i in [1, group.start(), group.start() + 1, len(withoutMarkup)]]))

        self.closeOutFiles()
        print "all: {}; not in kabat: {}; without fr4: {}".format(current, self.not_found_in_kabat, self.fr4_not_found)
开发者ID:biocad,项目名称:au-summer-2013,代码行数:33,代码来源:markup.py

示例5: aa_seq

# 需要导入模块: from pyfasta import Fasta [as 别名]
# 或者: from pyfasta.Fasta import keys [as 别名]
def aa_seq(options):
    """ Gets the ancestral sequence from a Fasta file

    """
    f = Fasta(options.ancestralfasta)
    keyz = (f.keys())
    match = ''
    if (options.single_chromosome):
        # Single chromosome fasta should only have one sequence.
        # that sequence should be the sequence of interest.
        keyz = list(keyz)
        key = keyz[0]
    else:
        get_chromosome_from_header = options.header
        get_chromosome_from_header = \
            get_chromosome_from_header.replace('?', options.chromosome)
        for key in keyz:
            if(re.match(get_chromosome_from_header, key) is not None):
                match = key
        if(match is ''):
            raise Exception("No match possible is something wrong with the"
                            " regex specified to the program as"
                            "--header-regex")
    aaSeq = f[key]
    return(aaSeq)
开发者ID:MMesbahU,项目名称:selectionTools,代码行数:27,代码来源:aa_annotate.py

示例6: _no_empty

# 需要导入模块: from pyfasta import Fasta [as 别名]
# 或者: from pyfasta.Fasta import keys [as 别名]
    def _no_empty(self, lista, listb):
        ''' removes empty entries '''
        
        # check for empty fasta.
        tmpa = list()
        tmpb = list()
        for i in range(len(listb)):
            
            # open it.
            try:
                z = Fasta(listb[i], record_class=MemoryRecord)
            
                # check for empty.
                if len(z.keys()) == 0:
                    continue

                # add to temp.
                tmpa.append(lista[i])
                tmpb.append(listb[i])

            except:
                logging.warning("bad fasta file")
            
        # sort back.
        return tmpa, tmpb
开发者ID:jim-bo,项目名称:parabio,代码行数:27,代码来源:nucmer.py

示例7: create_fasta_flat_file

# 需要导入模块: from pyfasta import Fasta [as 别名]
# 或者: from pyfasta.Fasta import keys [as 别名]
def create_fasta_flat_file(file):
    """Reads a fasta file for fast sequence retrival"""

    fasta_file = Fasta(file, key_fn=lambda key: key.split()[0])

    fasta_headers = set(fasta_file.keys());

    return fasta_file, fasta_headers
开发者ID:henrikstranneheim,项目名称:VariantUtilities,代码行数:10,代码来源:splicer.py

示例8: genome_contenct_stats

# 需要导入模块: from pyfasta import Fasta [as 别名]
# 或者: from pyfasta.Fasta import keys [as 别名]
def genome_contenct_stats(fasta_path):
    f = Fasta(fasta_path)
    g_box_total = []
    for seqid in f.keys():
        seq = f[seqid][:]
        g_boxs = len(re.findall("CACGTG", seq, flags=re.IGNORECASE))
        g_box_total.append(g_boxs)
    print >> sys.stderr, "total gboxes:{0}".format(sum(g_box_total))
开发者ID:gturco,项目名称:random_bio_tools,代码行数:10,代码来源:genome_stats.py

示例9: split

# 需要导入模块: from pyfasta import Fasta [as 别名]
# 或者: from pyfasta.Fasta import keys [as 别名]
def split(args):
    parser = optparse.OptionParser("""\
   split a fasta file into separated files.
        pyfasta split -n 6 [-k 5000 ] some.fasta
    the output will be some.1.fasta, some.2.fasta ... some.6.fasta
    the sizes will be as even as reasonable.
   """)
    parser.add_option("--header", dest="header", metavar="FILENAME_FMT",
       help="""this overrides all other options. if specified, it will
               split the file into a separate file for each header. it
               will be a template specifying the file name for each new file.
               e.g.:    "%(fasta)s.%(seqid)s.fasta"
               where 'fasta' is the basename of the input fasta file and seqid
               is the header of each entry in the fasta file.""" ,default=None)

    parser.add_option("-n", "--n", type="int", dest="nsplits", 
                            help="number of new files to create")
    parser.add_option("-o", "--overlap", type="int", dest="overlap", 
                            help="overlap in basepairs", default=0)
    parser.add_option("-k", "--kmers", type="int", dest="kmers", default=-1,
                     help="""\
    split big files into pieces of this size in basepairs. default
    default of -1 means do not split the sequence up into k-mers, just
    split based on the headers. a reasonable value would be 10Kbp""")
    options, fasta = parser.parse_args(args)
    if not (fasta and (options.nsplits or options.header)):
        sys.exit(parser.print_help())

    if isinstance(fasta, (tuple, list)):
        assert len(fasta) == 1, fasta
        fasta = fasta[0]

    kmer = options.kmers if options.kmers != -1 else None
    overlap = options.overlap if options.overlap != 0 else None
    f = Fasta(fasta)
    if options.header:
        names = dict([(seqid, options.header % \
                      dict(fasta=f.fasta_name, seqid=seqid)) \
                                       for seqid in f.keys()])
        """
        if len(names) > 0:
            assert names[0][1] != names[1][1], ("problem with header format", options.header)
        fhs = dict([(seqid, open(fn, 'wb')) for seqid, fn in names[:200]])
        fhs.extend([(seqid, StringIO(), fn) for seqid, fn in names[200:]])
        """
        return with_header_names(f, names)
    else:
        names = newnames(fasta, options.nsplits, kmers=kmer, overlap=overlap, 
                     header=options.header)

        #fhs = [open(n, 'wb') for n in names]
    if options.kmers == -1:
        return without_kmers(f, names)
    else: 
        return with_kmers(f, names, options.kmers, options.overlap)
开发者ID:brettjurgens,项目名称:466-project,代码行数:57,代码来源:split_fasta.py

示例10: mask_to_bed

# 需要导入模块: from pyfasta import Fasta [as 别名]
# 或者: from pyfasta.Fasta import keys [as 别名]
def mask_to_bed(fasta_file, mask_bed_name):
    "creates a bed file of the start and stops of masked seqs"
    mask_bed = open(mask_bed_name,"wb")
    f= Fasta(fasta_file)
    mask_id = 1
    for seqid in f.keys():
        seq = f[seqid][:]
        for m in re.finditer("X+",seq):
            mask_id = mask_id + 1
            w = '{0}\t{1}\t{2}\t{3}\t{4}\t+\t.\t.\t.\t1\t{5}\t0\n'.format(seqid,m.start(),m.end(),"mask_id {0}".format(mask_id),(m.end()-m.start()),(m.end()-m.start()+1))
            mask_bed.write(w)
开发者ID:gturco,项目名称:random_bio_tools,代码行数:13,代码来源:cns_to_bed.py

示例11: process_query

# 需要导入模块: from pyfasta import Fasta [as 别名]
# 或者: from pyfasta.Fasta import keys [as 别名]
def process_query():
    print('Reading sequence library and query sequence')
    library = Fasta(library_path)
    queries = Fasta(query_path)
    query_sequence = str(queries["Rattus"])

    print('Processing')
    progress = progressbar.ProgressBar(max_value=len(library.keys()))
    cpu_count = multiprocessing.cpu_count()
    executor = ThreadPoolExecutor(max_workers=cpu_count)

    tasks = []
    for record in list(library.keys())[:library_process_limit]:
        library_sequence = str(library[record])
        future = executor.submit(align, library_sequence, query_sequence)
        tasks.append(AlignmentTask(record, future))

    results = []
    for i in range(len(tasks)):
        _, _, score = tasks[i].future.result()
        results.append(AlignmentResult(title=tasks[i].record, score=score))
        progress.update(i)

    etalone_score = sum([ smatrix[(x, x)] for x in query_sequence ])

    print("Done")
    print("Etalone score is %d" % etalone_score)
    print("Got %d results, here are top-30 among them:" % len(results))
    print("Score  | Match   | Record")

    for sequence in sorted(results, key=lambda x: x.score, reverse=True)[:30]:
        match = (sequence.score / etalone_score) * 100.0
        print("%6d | %5.3f%% | %s" % (sequence.score, match, sequence.title))

    timer = get_performance_timer()
    for time in [timer.dotplot, timer.regions, timer.align]:
        print(time / cpu_count)
开发者ID:maxmalysh,项目名称:fasta,代码行数:39,代码来源:main.py

示例12: Sequence

# 需要导入模块: from pyfasta import Fasta [as 别名]
# 或者: from pyfasta.Fasta import keys [as 别名]
class Sequence():
    """docstring for Sequence"""
    def __init__(self, engine='mysql', function = 'iterator', **kwargs):
        self.engine = engine
        if self.engine == 'mysql' and function == 'iterator':
            self.create_mysql_iterator(**kwargs)
        elif self.engine == 'biopython' and kwargs['data_type'] == 'fasta':
            self.create_biopython_iterator(**kwargs)
        elif self.engine == 'pyfasta' and kwargs['data_type'] == 'fasta':
            self.create_pyfasta_iterator(**kwargs)
        elif self.engine == 'twobit' and kwargs['data_type'] == 'twobit':
            self.create_twobit_iterator(**kwargs)

    def create_mysql_iterator(self, **kwargs):
        cur = kwargs['cursor']
        query = '''SELECT id, record FROM sequence WHERE n_count <= 2 AND 
                    trimmed_len > 40'''
        cur.execute(query)
        self.readcount = cur.rowcount
        self.read = iter(cur.fetchall())

    def create_biopython_iterator(self, **kwargs):
        from Bio import SeqIO
        print "Generating BioPython sequence index.  This may take a moment...."
        self.fasta = SeqIO.index(kwargs['input'], kwargs['data_type'])
        self.readcount = len(self.fasta)
        self.db_values = zip(range(len(self.fasta)), sorted(self.fasta.keys()))
        self.read = iter(self.db_values)

    def create_twobit_iterator(self, **kwargs):
        import bx.seq.twobit
        self.fasta = bx.seq.twobit.TwoBitFile(file(kwargs['input']))
        self.readcount = self.fasta.seq_count
        self.db_values = zip(range(self.fasta.seq_count), sorted(self.fasta.keys()))
        self.read = iter(self.db_values)

    def create_pyfasta_iterator(self, **kwargs):
        from pyfasta import Fasta
        print "Generating PyFasta sequence index.  This may take a moment...."
        self.fasta = Fasta(kwargs['input'])
        self.readcount = len(self.fasta)
        self.db_values = zip(range(len(self.fasta)), sorted(self.fasta.keys()))
        self.read = iter(self.db_values)

    def get_pyfasta_reads(self, **kwargs):
        from pyfasta import Fasta
        self.fasta = Fasta(kwargs['input'])
        self.readcount = len(self.fasta)
开发者ID:faircloth-lab,项目名称:msatcommander-gs,代码行数:50,代码来源:design_primers.py

示例13: generate_corpusfile

# 需要导入模块: from pyfasta import Fasta [as 别名]
# 或者: from pyfasta.Fasta import keys [as 别名]
def generate_corpusfile(fasta_fname, n, corpus_fname):
    '''
    Args:
        fasta_fname: corpus file name
        n: the number of chunks to split. In other words, "n" for "n-gram"
        corpus_fname: corpus_fnameput corpus file path
    Description:
        Protvec uses word2vec inside, and it requires to load corpus file
        to generate corpus.
    '''
    f = open(corpus_fname, "w")
    fasta = Fasta(fasta_fname)
    for record_id in tqdm(fasta.keys(), desc='corpus generation progress'):
        r = fasta[record_id]
        seq = str(r)
        ngram_patterns = split_ngrams(seq, n)
        for ngram_pattern in ngram_patterns:
            f.write(" ".join(ngram_pattern) + "\n")
    f.close()
开发者ID:kyu999,项目名称:biovec,代码行数:21,代码来源:models.py

示例14: read_fasta

# 需要导入模块: from pyfasta import Fasta [as 别名]
# 或者: from pyfasta.Fasta import keys [as 别名]
def read_fasta(ref_files, fasta_header):
    """Read fasta file

    New line character can only exist between header and sequence,
    not inside sequence

    Args:
        file_path (str): Path to fasta file.

    Returns:
        fasta_dict (dict): Dictionary with fasta headers as keys and the
            sequences as values.
    """
    # Open fasta file and store headers and sequences
    for fasta_path in ref_files:
        # print(fasta_path)
        fasta = Fasta(fasta_path)
        if fasta_header in fasta.keys():
            return fasta
开发者ID:emilhaegglund,项目名称:master-thesis,代码行数:21,代码来源:simulate.py

示例15: split_seqs

# 需要导入模块: from pyfasta import Fasta [as 别名]
# 或者: from pyfasta.Fasta import keys [as 别名]
    def split_seqs(self, num_jobs, max_ref=5, max_qry=20):
        ''' splits reference and query into appropriate number of splits '''
        
        # load data into memory.
        r = Fasta(self.ref_fasta, record_class=MemoryRecord)
        q = Fasta(self.qry_fasta, record_class=MemoryRecord)
        
        ## reference ##
        # split according to criteria.
        if len(r) < max_ref:
            max_ref = len(r)
            
        if max_ref > num_jobs:
            max_ref = 1
        
        if len(q) < max_qry:
            max_qry = len(q)

        if num_jobs < max_qry:
            max_qry = num_jobs

        if (max_ref * max_qry) > num_jobs:
            max_qry = int(float(num_jobs) / float(max_ref))
        
        # count number of seqs.
        sc = len(r.keys())
        
        # create split info.
        self.ref_names = ["ref_%i" % x for x in range(max_ref)]
        self.ref_files = ["%s/%s.fasta" % (self.out_dir, x) for x in self.ref_names]
        
        # split according to rules.
        pyfasta.split_fasta.without_kmers(r, self.ref_files)
        self.ref_names, self.ref_files = self._no_empty(self.ref_names, self.ref_files)
        
        ## query ##
        # create split info.
        self.qry_names = ["qry_%i" % x for x in range(max_qry)]
        self.qry_files = ["%s/%s.fasta" % (self.out_dir, x) for x in self.qry_names]
        
        # split according to rules.
        pyfasta.split_fasta.without_kmers(q, self.qry_files)
        self.qry_names, self.qry_files = self._no_empty(self.qry_names, self.qry_files)
开发者ID:jim-bo,项目名称:parabio,代码行数:45,代码来源:nucmer.py


注:本文中的pyfasta.Fasta.keys方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。