当前位置: 首页>>代码示例>>Python>>正文


Python SeqIO.index方法代码示例

本文整理汇总了Python中Bio.SeqIO.index方法的典型用法代码示例。如果您正苦于以下问题:Python SeqIO.index方法的具体用法?Python SeqIO.index怎么用?Python SeqIO.index使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Bio.SeqIO的用法示例。


在下文中一共展示了SeqIO.index方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: getBreakPoint

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import index [as 别名]
def getBreakPoint(tupList, idx, direction='reverse', gap=2000):
    # takes list of tuples of coords and a starting index (idx). finds closest
    # break point in between tuple coordSorted
    solution = False
    while not solution:
        try:
            start, end, diff = tupList[idx]
        except IndexError:
            return False
        if diff >= gap:
            phase = int(round(diff/2))
            solution = end + phase
        else:
            if direction == 'reverse':
                idx -= 1
            else:
                idx += 1
    return solution 
开发者ID:nextgenusfs,项目名称:funannotate,代码行数:20,代码来源:funannotate-runEVM.py

示例2: _write_strain_functional_genes

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import index [as 别名]
def _write_strain_functional_genes(self, strain_id, ref_functional_genes, orth_matrix, force_rerun=False):
        """Create strain functional genes json file"""
        func_genes_path = op.join(self.model_dir, '{}_funcgenes.json'.format(strain_id))

        if ssbio.utils.force_rerun(flag=force_rerun, outfile=func_genes_path):
            gene_to_func = {k:True for k in ref_functional_genes}
            # Get a list of genes which do not have orthology in the strain
            genes_to_remove = orth_matrix[pd.isnull(orth_matrix[strain_id])][strain_id].index.tolist()

            # Mark genes non-functional
            genes_to_remove = list(set(genes_to_remove).intersection(set(ref_functional_genes)))

            if len(genes_to_remove) > 0:
                for g in genes_to_remove:
                    gene_to_func[g] = False

            with open(func_genes_path, 'w') as f:
                json.dump(gene_to_func, f)
        else:
            with open(func_genes_path, 'r') as f:
                gene_to_func = json.load(f)

        return strain_id, gene_to_func 
开发者ID:SBRG,项目名称:ssbio,代码行数:25,代码来源:atlas2.py

示例3: _load_sequences_to_reference_gene

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import index [as 别名]
def _load_sequences_to_reference_gene(self, g_id, force_rerun=False):
        """Load orthologous strain sequences to reference Protein object, save as new pickle"""
        protein_seqs_pickle_path = op.join(self.sequences_by_gene_dir, '{}_protein_withseqs.pckl'.format(g_id))

        if ssbio.utils.force_rerun(flag=force_rerun, outfile=protein_seqs_pickle_path):
            protein_pickle_path = self.gene_protein_pickles[g_id]
            protein_pickle = ssbio.io.load_pickle(protein_pickle_path)

            for strain, info in self.strain_infodict.items():
                strain_sequences = SeqIO.index(info['genome_path'], 'fasta')
                strain_gene_functional = info['functional_genes'][g_id]
                if strain_gene_functional:
                    # Pull the gene ID of the strain from the orthology matrix
                    strain_gene_key = self.df_orthology_matrix.at[g_id, strain]
                    new_id = '{}_{}'.format(g_id, strain)
                    if protein_pickle.sequences.has_id(new_id):
                        continue
                    protein_pickle.load_manual_sequence(seq=strain_sequences[strain_gene_key],
                                                        ident=new_id,
                                                        set_as_representative=False)
            protein_pickle.save_pickle(outfile=protein_seqs_pickle_path)

        return g_id, protein_seqs_pickle_path 
开发者ID:SBRG,项目名称:ssbio,代码行数:25,代码来源:atlas2.py

示例4: split_blast_inputs_by_core

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import index [as 别名]
def split_blast_inputs_by_core(blast_inputs, threads, blast_files_dir):
    """
    """

    splitted_ids = [[] for cpu in range(threads)]
    splitted_values = [[] for cpu in range(threads)]
    cluster_sums = [0] * threads
    i = 0
    for cluster in blast_inputs:
        cluster_file = os.path.join(blast_files_dir,
                                    '{0}_ids.txt'.format(cluster))
        with open(cluster_file, 'r') as infile:
            cluster_seqs = [line.strip() for line in infile.readlines()]
        splitted_values[i].append(len(cluster_seqs))
        splitted_ids[i].append(cluster)
        cluster_sums[i] += len(cluster_seqs)
        i = cluster_sums.index(min(cluster_sums))

    return [s for s in splitted_ids if len(s) > 0] 
开发者ID:B-UMMI,项目名称:chewBBACA,代码行数:21,代码来源:determine_paralogs.py

示例5: reverse_bed

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import index [as 别名]
def reverse_bed():
    """Convert bed-file coordinates to coordinates on the reverse strand."""
    parser = argparse.ArgumentParser(
        prog='reverse_bed',
        description='Convert bed-file coordinates to coordinates on the reverse strand.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument('bed_in', help='Input bed file.')
    parser.add_argument('ref_fasta', help='Input reference fasta file.')
    parser.add_argument('bed_out', help='Output bed file.')
    args = parser.parse_args()

    fasta = pysam.FastaFile(args.ref_fasta)
    lengths = dict(zip(fasta.references, fasta.lengths))
    d = pd.read_csv(args.bed_in, sep='\t', names=['chrom', 'start', 'stop'])

    d['chrom_length'] = d['chrom'].map(lambda x: lengths[x])
    d['rc_stop'] = d['chrom_length'] - d['start']
    d['rc_start'] = d['chrom_length'] - d['stop']
    d['chrom_rc'] = d['chrom'] + '_rc'
    d[['chrom_rc', 'rc_start', 'rc_stop']].to_csv(args.bed_out, index=False, header=False, sep='\t') 
开发者ID:nanoporetech,项目名称:pomoxis,代码行数:23,代码来源:util.py

示例6: __getitem__

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import index [as 别名]
def __getitem__(self, index: int):
        if not 0 <= index < self._num_examples:
            raise IndexError(index)

        # if self._in_memory and self._cache[index] is not None:
        record = self._cache[index]
        # else:
            # key = self._keys[index]
            # record = self._records[key]
            # if self._in_memory:
                # self._cache[index] = record

        item = {'id': record.id,
                'primary': str(record.seq),
                'protein_length': len(record.seq)}
        return item 
开发者ID:songlab-cal,项目名称:tape,代码行数:18,代码来源:datasets.py

示例7: compress_and_send_mail

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import index [as 别名]
def compress_and_send_mail(job_name, fasta_file, index_file, lock_file, mailer):
        """
        Compress fasta file and the send mail with its link to the client

        :param job_name: job id
        :type job_name: str
        :param fasta_file: fasta file path
        :type fasta_file: str
        :param index_file: index file path
        :type index_file: str
        :param lock_file: lock file path
        :type lock_file: str
        :param mailer: mailer object (to send mail)
        :type mailer: Mailer
        """
        Functions.compress(fasta_file)
        os.remove(lock_file)
        index, sample_name = Functions.read_index(index_file)
        Functions.send_fasta_ready(mailer, job_name, sample_name, True) 
开发者ID:genotoul-bioinfo,项目名称:dgenies,代码行数:21,代码来源:functions.py

示例8: build_list_no_assoc

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import index [as 别名]
def build_list_no_assoc(self, to):
        """
        Build list of queries that match with None target, or the opposite

        :param to: query or target
        :return: content of the file
        """
        index = self.idx_q if to == "query" else self.idx_t
        name, contigs_list, contigs, reversed, abs_start, c_len = Index.load(index)
        contigs_list = set(contigs_list)
        with open(self.paf, "r") as paf:
            for line in paf:
                c_name = line.strip("\n").split("\t")[0 if to == "query" else 5]
                if c_name in contigs_list:
                    contigs_list.remove(c_name)
        return "\n".join(contigs_list) + "\n" 
开发者ID:genotoul-bioinfo,项目名称:dgenies,代码行数:18,代码来源:paf.py

示例9: _load_strain_sequences

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import index [as 别名]
def _load_strain_sequences(self, strain_gempro):
        """Load strain sequences from the orthology matrix into the base model for comparisons, and into the
        strain-specific model itself.

        """
        if self._orthology_matrix_has_sequences:  # Load directly from the orthology matrix if it contains sequences
            strain_sequences = self.df_orthology_matrix[strain_gempro.id].to_dict()
        else:  # Otherwise load from the genome file if the orthology matrix contains gene IDs
            # Load the genome FASTA file
            log.debug('{}: loading strain genome CDS file'.format(strain_gempro.genome_path))
            strain_sequences = SeqIO.index(strain_gempro.genome_path, 'fasta')

        for strain_gene in strain_gempro.genes:
            if strain_gene.functional:
                if self._orthology_matrix_has_sequences:
                    strain_gene_key = strain_gene.id
                else:
                    # Pull the gene ID of the strain from the orthology matrix
                    strain_gene_key = self.df_orthology_matrix.loc[strain_gene.id, strain_gempro.id]
                    log.debug('{}: original gene ID to be pulled from strain fasta file'.format(strain_gene_key))

                # # Load into the base strain for comparisons
                ref_gene = self.reference_gempro.genes.get_by_id(strain_gene.id)
                new_id = '{}_{}'.format(strain_gene.id, strain_gempro.id)
                if ref_gene.protein.sequences.has_id(new_id):
                    log.debug('{}: sequence already loaded into reference model'.format(new_id))
                    continue
                ref_gene.protein.load_manual_sequence(seq=strain_sequences[strain_gene_key], ident=new_id,
                                                      set_as_representative=False)
                log.debug('{}: loaded sequence into reference model'.format(new_id))

                # Load into the strain GEM-PRO
                strain_gene.protein.load_manual_sequence(seq=strain_sequences[strain_gene_key], ident=new_id,
                                                         set_as_representative=True)
                log.debug('{}: loaded sequence into strain model'.format(new_id)) 
开发者ID:SBRG,项目名称:ssbio,代码行数:37,代码来源:atlas.py

示例10: build_strain_specific_models

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import index [as 别名]
def build_strain_specific_models(self, save_models=False):
        """Using the orthologous genes matrix, create and modify the strain specific models based on if orthologous
            genes exist.

        Also store the sequences directly in the reference GEM-PRO protein sequence attribute for the strains.
        """

        if len(self.df_orthology_matrix) == 0:
            raise RuntimeError('Empty orthology matrix')

        # Create an emptied copy of the reference GEM-PRO
        for strain_gempro in tqdm(self.strains):
            log.debug('{}: building strain specific model'.format(strain_gempro.id))

            # For each genome, load the metabolic model or genes from the reference GEM-PRO
            logging.disable(logging.WARNING)
            if self._empty_reference_gempro.model:
                strain_gempro.load_cobra_model(self._empty_reference_gempro.model)
            elif self._empty_reference_gempro.genes:
                strain_gempro.genes = [x.id for x in self._empty_reference_gempro.genes]
            logging.disable(logging.NOTSET)

            # Get a list of genes which do not have orthology in the strain
            not_in_strain = self.df_orthology_matrix[pd.isnull(self.df_orthology_matrix[strain_gempro.id])][strain_gempro.id].index.tolist()

            # Mark genes non-functional
            self._pare_down_model(strain_gempro=strain_gempro, genes_to_remove=not_in_strain)

            # Load sequences into the base and strain models
            self._load_strain_sequences(strain_gempro=strain_gempro)

            if save_models:
                cobra.io.save_json_model(model=strain_gempro.model,
                                         filename=op.join(self.model_dir, '{}.json'.format(strain_gempro.id)))
                strain_gempro.save_pickle(op.join(self.model_dir, '{}_gp.pckl'.format(strain_gempro.id)))


        log.info('Created {} new strain-specific models and loaded in sequences'.format(len(self.strains))) 
开发者ID:SBRG,项目名称:ssbio,代码行数:40,代码来源:atlas.py

示例11: _build_strain_specific_model

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import index [as 别名]
def _build_strain_specific_model(self, strain_id, ref_functional_genes, orth_matrix, force_rerun=False):
        """Create strain GEMPRO, set functional genes"""
        gp_noseqs_path = op.join(self.model_dir, '{}_gp.pckl'.format(strain_id))

        if ssbio.utils.force_rerun(flag=force_rerun, outfile=gp_noseqs_path):
            logging.disable(logging.WARNING)

            strain_gp = GEMPRO(gem_name=strain_id)

            # if self.reference_gempro.model:
            #     strain_gp.load_cobra_model(deepcopy(self.reference_gempro.model))
            #     # Reset the GenePro attributes
            #     for x in strain_gp.genes:
            #         x.reset_protein()
            # else:
                # Otherwise, just copy the list of genes over and rename the IDs
            strain_genes = [x for x in ref_functional_genes]
            strain_gp.add_gene_ids(strain_genes)

            logging.disable(logging.NOTSET)

            # Get a list of genes which do not have orthology in the strain
            genes_to_remove = orth_matrix[pd.isnull(orth_matrix[strain_id])][strain_id].index.tolist()

            # Mark genes non-functional
            strain_genes = [x.id for x in strain_gp.genes]
            genes_to_remove = list(set(genes_to_remove).intersection(set(strain_genes)))

            if len(genes_to_remove) > 0:
                # If a COBRApy model exists, utilize the delete_model_genes method
                # if strain_gp.model:
                #     cobra.manipulation.delete_model_genes(strain_gp.model, genes_to_remove)
                # # Otherwise, just mark the genes as non-functional
                # else:
                for g in genes_to_remove:
                    strain_gp.genes.get_by_id(g).functional = False

            strain_gp.save_pickle(outfile=gp_noseqs_path)

        return strain_id, gp_noseqs_path 
开发者ID:SBRG,项目名称:ssbio,代码行数:42,代码来源:atlas2.py

示例12: __init__

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import index [as 别名]
def __init__(self, seq_table, records, max_dist, min_fold, threshold_pval, log=None):
        '''
        seq_table: pandas.DataFrame
          Samples on the columns; sequences on the rows
        records: index of Bio.Seq
          Indexed, unaligned input sequences. This could come from BioPython's
          SeqIO.to_dict or SeqIO.index.
        max_dist: float
          genetic distance cutoff above which a sequence will not be merged into an OTU
        min_fold: float
          Multiply the sequence's abundance by this fold to get the minimum abundance
          of an OTU for merging
        threshold_pval: float
          P-value below which a sequence will not be merged into an OTU
        log: filehandle
          Log file reporting the abundance, genetic, and distribution checks.
        '''
        self.seq_table = seq_table
        self.records = records
        self.max_dist = max_dist
        self.min_fold = min_fold
        self.threshold_pval = threshold_pval
        self.log = log

        # get a list of the names of the sequences in order of their (decreasing) abundance
        self.seq_abunds = self.seq_table.sum(axis=1).sort_values(ascending=False)

        # check that all sequence IDs in the table are in the fasta
        missing_ids = [seq_id for seq_id in self.seq_abunds.index if seq_id not in self.records]
        if len(missing_ids) > 0:
            raise RuntimeError("{} sequence IDs found in the sequence table but not in the fasta: {}".format(len(missing_ids), missing_ids))

        # initialize OTU information
        self.membership = {}
        self.otus = [] 
开发者ID:thomasgurry,项目名称:amplicon_sequencing_pipeline,代码行数:37,代码来源:dbotu.py

示例13: _process_record

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import index [as 别名]
def _process_record(self, record_id):
        '''
        Process the next sequence: run the genetic, abundance, and distribution checks, either
        merging the sequence into an existing OTU or creating a new OTU.
        '''
        assert record_id in self.seq_table.index
        record = self.records[record_id]

        candidate = OTU(record.id, str(record.seq), self.seq_table.loc[record.id])

        if self.log is not None:
            print('seq', candidate.name, sep='\t', file=self.log)

        merged = False
        for otu in self.ga_matches(candidate):
            test_pval = candidate.distribution_pval(otu)

            if self.log is not None:
                print(candidate.name, 'distribution_check', otu.name, test_pval, sep='\t', file=self.log)

            if test_pval > self.threshold_pval:
                otu.absorb(candidate)
                self.membership[otu.name].append(candidate.name)
                merged = True
                break

        if not merged:
            # form own otu
            self.otus.append(candidate)
            self.membership[candidate.name] = [candidate.name] 
开发者ID:thomasgurry,项目名称:amplicon_sequencing_pipeline,代码行数:32,代码来源:dbotu.py

示例14: generate_otu_table

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import index [as 别名]
def generate_otu_table(self):
        '''
        Process all the input sequences to make an OTU table.

        returns: pandas.DataFrame
          OTU table (which can also be found at instance.otu_table)
        '''
        for record_id in self.seq_abunds.index:
            self._process_record(record_id)

        self.otus.sort(key=lambda otu: otu.abundance, reverse=True)
        self.otu_table = pd.DataFrame([otu.counts for otu in self.otus], index=[otu.name for otu in self.otus])
        self.otu_table.columns = self.seq_table.columns

        return self.otu_table 
开发者ID:thomasgurry,项目名称:amplicon_sequencing_pipeline,代码行数:17,代码来源:dbotu.py

示例15: read_sequence_table

# 需要导入模块: from Bio import SeqIO [as 别名]
# 或者: from Bio.SeqIO import index [as 别名]
def read_sequence_table(fn):
    '''
    Read in a table of sequences. Expect a header and the sequence IDs in the
    first column. Samples are on the columns.

    fn: filename (or handle)

    returns: pandas.DataFrame
    '''
    df = pd.read_table(fn, dtype={0: str}, header=0)
    df.index = df.iloc[:,0]
    df = df.iloc[:,1:].astype(int)
    return df 
开发者ID:thomasgurry,项目名称:amplicon_sequencing_pipeline,代码行数:15,代码来源:dbotu.py


注:本文中的Bio.SeqIO.index方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。