本文整理汇总了Python中pyfaidx.Fasta方法的典型用法代码示例。如果您正苦于以下问题:Python pyfaidx.Fasta方法的具体用法?Python pyfaidx.Fasta怎么用?Python pyfaidx.Fasta使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyfaidx
的用法示例。
在下文中一共展示了pyfaidx.Fasta方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def __init__(self,
gtf_file,
fasta_file,
intron5prime_len=100,
intron3prime_len=100,
transform=None,
**kwargs):
try:
with open(gtf_file, 'rb') as f:
self.exons = pickle.load(f)
except (FileNotFoundError, pickle.UnpicklingError, ModuleNotFoundError):
self.exons = generate_exons(gtf_file=gtf_file,
overhang=(intron5prime_len,
intron3prime_len),
**kwargs)
import six
if isinstance(fasta_file, six.string_types):
fasta = Fasta(fasta_file, as_raw=False)
self.fasta = fasta
self.transform = transform
示例2: generate_gap_bed
# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def generate_gap_bed(fname, outname):
""" Generate a BED file with gap locations.
Parameters
----------
fname : str
Filename of input FASTA file.
outname : str
Filename of output BED file.
"""
f = Fasta(fname)
with open(outname, "w") as bed:
for chrom in f.keys():
for m in re.finditer(r"N+", f[chrom][:].seq):
bed.write(f"{chrom}\t{m.start(0)}\t{m.end(0)}\n")
示例3: sequence_from_coords
# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def sequence_from_coords(fastafile, coords):
"""Fasta sequence from genome coords.
Args:
fastafile: inpout fasta file
coords: genome coordinates as tuple of the form
(chrom,start,end,strand)
"""
if not os.path.exists(fastafile):
print ('no such file')
return
chrom,start,end,strand = coords
from pyfaidx import Fasta
genes = Fasta(fastafile)
try:
#gets seq string from genome
seq = str(genes[chrom][start:end])
if strand == '-':
seq = str(HTSeq.Sequence(seq.encode()).get_reverse_complement())
except Exception as e:
print (e)
return
return seq
示例4: lengths
# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def lengths(self):
"""
Return a dictionary of sequence lengths.
:return: a dictionary which keys are sequence names and
values are their lengths
:rtype: dict
"""
total_length = 0
if not self.__lengths:
reader = pyfaidx.Fasta(self.__filename)
for seq in reader.keys():
self.__lengths[seq] = len(reader[seq])
total_length += len(reader[seq])
logger.debug('%d sequences analyzed with the total length of '
'%d bp', len(self.__lengths), total_length)
return self.__lengths
示例5: fetch_seq
# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def fetch_seq(chrom=str, start=int, stop=int, strand=str, genome=pyfaidx.Fasta,
indexing = 0):
""" Given a genomic interval, return the sequence with respect to the
strand supplied.
If 1-based indexing is specified, then 1 will be subtracted from the
position to convert to the Python indexing. """
if start > stop:
raise ValueError("Start must be less than or equal to stop")
if indexing != 0:
if indexing == 1:
start -= 1
else:
raise ValueError("Valid indexing modes include: 1 or 0")
seq = genome[chrom][start:stop]
if strand == "-":
seq = seq.reverse.complement
return str(seq)
示例6: test_get_frac_minus_strand
# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def test_get_frac_minus_strand():
"""
1 5 10 15 20 25
ACTGACTGACTGAAATAAGAAACTGACTG
TGACTGACTGACTTTATTCTTTGACTGAC
ACTTTATTCTTT (-)
With range_size set to 8, we expect to extract seq "GTCAGTCA"
The correct fraction of As is therefore 2/8.
"""
genome_file = "talon_label_reads/test_inputs/toy_genome.fa"
genome = pyfaidx.Fasta(genome_file, sequence_always_upper=True,
one_based_attributes=False)
frac = tlr.compute_frac_as_after_transcript("chrTest1", 11, '-', 8,
genome)
assert frac == 2.0/8
示例7: __init__
# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def __init__(self, path, debug=False):
self.path = path
self.fasta = pyfaidx.Fasta(path, as_raw=True)
self.debug = debug
self.chroms_to_std_chroms = collections.OrderedDict()
self.std_chroms_to_chroms = collections.OrderedDict()
self.chrom_lengths = collections.OrderedDict()
chrom_re = r"(chr)?((\d+)|(X)|(Y))$"
for chrom in self.fasta.keys():
std_chrom = self.standardize_chrom(chrom)
# if re.match(chrom_re, std_chrom):
self.chroms_to_std_chroms[chrom] = std_chrom
self.std_chroms_to_chroms[std_chrom] = chrom
self.chrom_lengths[chrom] = len(self.fasta[chrom])
示例8: __init__
# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def __init__(self, _bamfiles, _reffile):
self.bamfiles = _bamfiles
self.bamreaders = []
for bam in self.bamfiles:
try:
br = pysam.Samfile(bam, "rb")
self.bamreaders.append(br)
except:
sys.stderr.write("ERROR: could not open %s. Is this a valid bam file?\n"%bam)
if _reffile != "":
try:
self.reference = pyfaidx.Fasta(_reffile, as_raw=True)
except:
self.reference = None
else: self.reference = None
self.alignment_grid = None
self.read_groups = self.LoadRGDictionary()
示例9: main
# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def main():
parser = argparse.ArgumentParser(description='Identify off-target candidates from Illumina short read sequencing data.')
parser.add_argument('--ref', help='Reference Genome Fasta', required=True)
parser.add_argument('--bam', help='Sorted BAM file', required=True)
parser.add_argument('--control', help='Control BAM file', required=True)
parser.add_argument('--targetsite', help='Targetsite Sequence', required=True)
parser.add_argument('--search_radius', help='Search radius around the position window', default=20, type=int)
parser.add_argument('--windowsize', help='Windowsize', default=3, type=int)
parser.add_argument('--mapq', help='mapq threshold', default=50, type=int)
parser.add_argument('--gap', help='Gap threshold', default=3, type=int)
parser.add_argument('--start', help='Start threshold', default=1 , type=int)
parser.add_argument('--mismatch_threshold', help='Maximum score threshold', default=6, type=int)
parser.add_argument('--merged', dest='merged', action='store_true', default=True)
parser.add_argument('--all_chromosomes', dest='all_chromosomes', action='store_true', default=False)
parser.add_argument('--name', help='Targetsite Name', required=False)
parser.add_argument('--cells', help='Cells', required=False)
parser.add_argument('--out', help='Output file base', required=True)
args = parser.parse_args()
# Run the comparison if the control bam is specified, otherwise run the standard site identification routine.
print("Nuclease: {0}\nControl: {1}".format(args.bam, args.control), file=sys.stderr)
compare(args.ref, args.bam, args.control, args.targetsite, args.search_radius, args.windowsize, args.mapq, args.gap,
args.start, args.mismatch_threshold, args.name, args.cells, args.out, args.all_chromosomes, args.merged)
示例10: __init__
# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def __init__(self, region, fasta=None):
if ':' in region:
try:
self.chromosome, interval = region.replace(',', '').split(':')
self.begin, self.end = [int(i) for i in interval.split('-')]
except ValueError:
sys.exit("\n\nERROR: Window (-w/--window) inproperly formatted, "
"examples of accepted formats are:\n"
"'chr5:150200605-150423790' or 'ENST00000647408'\n\n")
self.size = self.end - self.begin
self.string = "{}_{}_{}".format(self.chromosome, self.begin, self.end)
else: # When region is an entire chromosome, contig or transcript
if fasta is None:
sys.exit("A fasta reference file is required if --window "
"is an entire chromosome, contig or transcript")
else:
self.chromosome = region
self.begin = 0
self.string = region
self.end = len(Fasta(fasta)[region])
self.size = self.end
示例11: getMobileElementFasta
# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def getMobileElementFasta(dataHub):
if not "repeats" in dataHub.sources:
dataHub.sources["repeats"] = genomesource.GenomeSource(dataHub.args.fasta)
# dataHub.sources["repeats"] = pyfaidx.Fasta(dataHub.args.fasta, as_raw=True)
return dataHub.sources["repeats"]
示例12: fasta
# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def fasta(self):
if self._fasta is None:
self._fasta = pyfaidx.Fasta(self.path, as_raw=True)
return self._fasta
示例13: load_fasta_sequences
# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def load_fasta_sequences(fasta_file, return_keys=False):
"""
Reads a FASTA file and returns list of string sequences
"""
fasta = Fasta(fasta_file, as_raw=True, sequence_always_upper=True)
seqs = [seq[:] for seq in fasta]
if return_keys:
keys = list(fasta.keys())
fasta.close()
if return_keys:
return seqs, keys
return seqs
示例14: read_pep_fa
# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def read_pep_fa(protein_file):
import pandas as pd
proteins = Fasta(str(protein_file))
pl = []
for v in proteins:
names = v.long_name.split(" ", 8)
d = {"protein_id": names[0], 'protein_type': names[1]}
d = {**d, **dict([n.split(":", 1) for n in names[2:]])}
d['seq'] = str(proteins[v.name])
pl.append(d)
return pd.DataFrame(pl)
示例15: __init__
# 需要导入模块: import pyfaidx [as 别名]
# 或者: from pyfaidx import Fasta [as 别名]
def __init__(self, fasta_file, use_strand=False, force_upper=False):
from pyfaidx import Fasta
self.fasta_file = fasta_file
self._use_strand = use_strand
self.fasta = Fasta(self.fasta_file)
self.force_upper = force_upper