当前位置: 首页>>代码示例>>Python>>正文


Python pyfaidx.Fasta方法代码示例

本文整理汇总了Python中pyfaidx.Fasta方法的典型用法代码示例。如果您正苦于以下问题:Python pyfaidx.Fasta方法的具体用法?Python pyfaidx.Fasta怎么用?Python pyfaidx.Fasta使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyfaidx的用法示例。


在下文中一共展示了pyfaidx.Fasta方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def __init__(self,
                 gtf_file,
                 fasta_file,
                 intron5prime_len=100,
                 intron3prime_len=100,
                 transform=None,
                 **kwargs):

        try:
            with open(gtf_file, 'rb') as f:
                self.exons = pickle.load(f)
        except (FileNotFoundError, pickle.UnpicklingError, ModuleNotFoundError):
            self.exons = generate_exons(gtf_file=gtf_file,
                                        overhang=(intron5prime_len,
                                                  intron3prime_len),
                                        **kwargs)
        import six
        if isinstance(fasta_file, six.string_types):
            fasta = Fasta(fasta_file, as_raw=False)
        self.fasta = fasta
        self.transform = transform 
开发者ID:kipoi,项目名称:kipoiseq,代码行数:23,代码来源:splicing.py

示例2: generate_gap_bed

# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def generate_gap_bed(fname, outname):
    """ Generate a BED file with gap locations.

    Parameters
    ----------
    fname : str
        Filename of input FASTA file.

    outname : str
        Filename of output BED file.
    """
    f = Fasta(fname)
    with open(outname, "w") as bed:
        for chrom in f.keys():
            for m in re.finditer(r"N+", f[chrom][:].seq):
                bed.write(f"{chrom}\t{m.start(0)}\t{m.end(0)}\n") 
开发者ID:vanheeringen-lab,项目名称:genomepy,代码行数:18,代码来源:utils.py

示例3: sequence_from_coords

# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def sequence_from_coords(fastafile, coords):
    """Fasta sequence from genome coords.
    Args:
        fastafile: inpout fasta file
        coords: genome coordinates as tuple of the form
                (chrom,start,end,strand)
    """

    if not os.path.exists(fastafile):
        print ('no such file')
        return
    chrom,start,end,strand = coords
    from pyfaidx import Fasta
    genes = Fasta(fastafile)
    try:
        #gets seq string from genome
        seq = str(genes[chrom][start:end])
        if strand == '-':
            seq = str(HTSeq.Sequence(seq.encode()).get_reverse_complement())
    except Exception as e:
        print (e)
        return
    return seq 
开发者ID:dmnfarrell,项目名称:smallrnaseq,代码行数:25,代码来源:utils.py

示例4: lengths

# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def lengths(self):
        """
        Return a dictionary of sequence lengths.

        :return: a dictionary which keys are sequence names and
            values are their lengths
        :rtype: dict
        """
        total_length = 0
        if not self.__lengths:
            reader = pyfaidx.Fasta(self.__filename)
            for seq in reader.keys():
                self.__lengths[seq] = len(reader[seq])
                total_length += len(reader[seq])

        logger.debug('%d sequences analyzed with the total length of '
                     '%d bp', len(self.__lengths), total_length)

        return self.__lengths 
开发者ID:gtamazian,项目名称:chromosomer,代码行数:21,代码来源:fragment.py

示例5: fetch_seq

# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def fetch_seq(chrom=str, start=int, stop=int, strand=str, genome=pyfaidx.Fasta,
              indexing = 0):
    """ Given a genomic interval, return the sequence with respect to the
        strand supplied.
        If 1-based indexing is specified, then 1 will be subtracted from the
        position to convert to the Python indexing. """

    if start > stop:
        raise ValueError("Start must be less than or equal to stop")

    if indexing != 0:
        if indexing == 1:
            start -= 1
        else:
            raise ValueError("Valid indexing modes include: 1 or 0")

    seq = genome[chrom][start:stop]

    if strand == "-":
        seq = seq.reverse.complement

    return str(seq) 
开发者ID:mortazavilab,项目名称:TALON,代码行数:24,代码来源:talon_label_reads.py

示例6: test_get_frac_minus_strand

# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def test_get_frac_minus_strand():
    """
          1   5    10   15   20   25
          ACTGACTGACTGAAATAAGAAACTGACTG
          TGACTGACTGACTTTATTCTTTGACTGAC
                    ACTTTATTCTTT  (-)                
        With range_size set to 8, we expect to extract seq "GTCAGTCA"
        The correct fraction of As is therefore 2/8.
    """
    genome_file = "talon_label_reads/test_inputs/toy_genome.fa"
    genome = pyfaidx.Fasta(genome_file, sequence_always_upper=True,
                           one_based_attributes=False)
    frac = tlr.compute_frac_as_after_transcript("chrTest1", 11, '-', 8,
                                                    genome)

    assert frac == 2.0/8 
开发者ID:mortazavilab,项目名称:TALON,代码行数:18,代码来源:test_compute_frac_as_after_transcript.py

示例7: __init__

# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def __init__(self, path, debug=False):
        self.path = path
        self.fasta = pyfaidx.Fasta(path, as_raw=True)
        self.debug = debug

        self.chroms_to_std_chroms = collections.OrderedDict()
        self.std_chroms_to_chroms = collections.OrderedDict()
        self.chrom_lengths = collections.OrderedDict()

        chrom_re = r"(chr)?((\d+)|(X)|(Y))$"

        for chrom in self.fasta.keys():
            std_chrom = self.standardize_chrom(chrom)
#            if re.match(chrom_re, std_chrom):
            self.chroms_to_std_chroms[chrom] = std_chrom
            self.std_chroms_to_chroms[std_chrom] = chrom
            self.chrom_lengths[chrom] = len(self.fasta[chrom]) 
开发者ID:grocsvs,项目名称:grocsvs,代码行数:19,代码来源:reference.py

示例8: __init__

# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def __init__(self, _bamfiles, _reffile):
        self.bamfiles = _bamfiles
        self.bamreaders = []
        for bam in self.bamfiles:
            try:
                br = pysam.Samfile(bam, "rb")
                self.bamreaders.append(br)
            except:
                sys.stderr.write("ERROR: could not open %s. Is this a valid bam file?\n"%bam)
        if _reffile != "":
            try:
                self.reference = pyfaidx.Fasta(_reffile, as_raw=True)
            except:
                self.reference = None
        else: self.reference = None
        self.alignment_grid = None
        self.read_groups = self.LoadRGDictionary() 
开发者ID:mgymrek,项目名称:pybamview,代码行数:19,代码来源:bam_alignment.py

示例9: main

# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def main():
    parser = argparse.ArgumentParser(description='Identify off-target candidates from Illumina short read sequencing data.')
    parser.add_argument('--ref', help='Reference Genome Fasta', required=True)
    parser.add_argument('--bam', help='Sorted BAM file', required=True)
    parser.add_argument('--control', help='Control BAM file', required=True)
    parser.add_argument('--targetsite', help='Targetsite Sequence', required=True)
    parser.add_argument('--search_radius', help='Search radius around the position window', default=20, type=int)
    parser.add_argument('--windowsize', help='Windowsize', default=3, type=int)
    parser.add_argument('--mapq', help='mapq threshold', default=50, type=int)
    parser.add_argument('--gap', help='Gap threshold', default=3, type=int)
    parser.add_argument('--start', help='Start threshold', default=1 , type=int)
    parser.add_argument('--mismatch_threshold', help='Maximum score threshold', default=6, type=int)
    parser.add_argument('--merged', dest='merged', action='store_true', default=True)
    parser.add_argument('--all_chromosomes', dest='all_chromosomes', action='store_true', default=False)
    parser.add_argument('--name', help='Targetsite Name', required=False)
    parser.add_argument('--cells', help='Cells', required=False)
    parser.add_argument('--out', help='Output file base', required=True)
    args = parser.parse_args()

    # Run the comparison if the control bam is specified, otherwise run the standard site identification routine.
    print("Nuclease: {0}\nControl: {1}".format(args.bam, args.control), file=sys.stderr)
    compare(args.ref, args.bam, args.control, args.targetsite, args.search_radius, args.windowsize, args.mapq, args.gap,
            args.start, args.mismatch_threshold, args.name, args.cells, args.out, args.all_chromosomes, args.merged) 
开发者ID:tsailabSJ,项目名称:circleseq,代码行数:25,代码来源:findCleavageSites.py

示例10: __init__

# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def __init__(self, region, fasta=None):
        if ':' in region:
            try:
                self.chromosome, interval = region.replace(',', '').split(':')
                self.begin, self.end = [int(i) for i in interval.split('-')]
            except ValueError:
                sys.exit("\n\nERROR: Window (-w/--window) inproperly formatted, "
                         "examples of accepted formats are:\n"
                         "'chr5:150200605-150423790' or 'ENST00000647408'\n\n")
            self.size = self.end - self.begin
            self.string = "{}_{}_{}".format(self.chromosome, self.begin, self.end)
        else:  # When region is an entire chromosome, contig or transcript
            if fasta is None:
                sys.exit("A fasta reference file is required if --window "
                         "is an entire chromosome, contig or transcript")
            else:
                self.chromosome = region
                self.begin = 0
                self.string = region
                self.end = len(Fasta(fasta)[region])
                self.size = self.end 
开发者ID:wdecoster,项目名称:methplotlib,代码行数:23,代码来源:utils.py

示例11: getMobileElementFasta

# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def getMobileElementFasta(dataHub):
    if not "repeats" in dataHub.sources:
        dataHub.sources["repeats"] = genomesource.GenomeSource(dataHub.args.fasta)
        # dataHub.sources["repeats"] = pyfaidx.Fasta(dataHub.args.fasta, as_raw=True)
    return dataHub.sources["repeats"] 
开发者ID:svviz,项目名称:svviz,代码行数:7,代码来源:vcf.py

示例12: fasta

# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def fasta(self):
        if self._fasta is None:
            self._fasta = pyfaidx.Fasta(self.path, as_raw=True)
        return self._fasta 
开发者ID:svviz,项目名称:svviz,代码行数:6,代码来源:genomesource.py

示例13: load_fasta_sequences

# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def load_fasta_sequences(fasta_file, return_keys=False):
    """
    Reads a FASTA file and returns list of string sequences
    """
    fasta = Fasta(fasta_file, as_raw=True, sequence_always_upper=True)
    seqs = [seq[:] for seq in fasta]
    if return_keys:
        keys = list(fasta.keys())
    fasta.close()
    if return_keys:
        return seqs, keys
    return seqs 
开发者ID:daquang,项目名称:YAMDA,代码行数:14,代码来源:sequences.py

示例14: read_pep_fa

# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def read_pep_fa(protein_file):
    import pandas as pd
    proteins = Fasta(str(protein_file))
    pl = []
    for v in proteins:
        names = v.long_name.split(" ", 8)
        d = {"protein_id": names[0], 'protein_type': names[1]}
        d = {**d, **dict([n.split(":", 1) for n in names[2:]])}
        d['seq'] = str(proteins[v.name])
        pl.append(d)
    return pd.DataFrame(pl) 
开发者ID:kipoi,项目名称:kipoiseq,代码行数:13,代码来源:test_translation.py

示例15: __init__

# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def __init__(self, fasta_file, use_strand=False, force_upper=False):
        from pyfaidx import Fasta

        self.fasta_file = fasta_file
        self._use_strand = use_strand
        self.fasta = Fasta(self.fasta_file)
        self.force_upper = force_upper 
开发者ID:kipoi,项目名称:kipoiseq,代码行数:9,代码来源:fasta.py


注:本文中的pyfaidx.Fasta方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。