本文整理汇总了Python中pysam.FastaFile方法的典型用法代码示例。如果您正苦于以下问题:Python pysam.FastaFile方法的具体用法?Python pysam.FastaFile怎么用?Python pysam.FastaFile使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pysam
的用法示例。
在下文中一共展示了pysam.FastaFile方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_data
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def get_data(fasta_file,region=None):
if None == region:
with open(fasta_file,"r") as f:
return(f.read())
else :
if isinstance(region,str):
region = regions.parse_region(region)
chr = region["chr"]
start = region["start"] - 1
end = region["end"]
fasta = pysam.FastaFile(fasta_file)
slice_seq = fasta.fetch(chr, start, end)
fasta.close()
return slice_seq
示例2: extract_fasta_to_file
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def extract_fasta_to_file(fasta, output_dir, mode="bcolz", overwrite=False):
assert mode in _array_writer
makedirs(output_dir, exist_ok=overwrite)
fasta_file = FastaFile(fasta)
file_shapes = {}
for chrom, size in zip(fasta_file.references, fasta_file.lengths):
data = np.zeros((size, NUM_SEQ_CHARS), dtype=np.float32)
seq = fasta_file.fetch(chrom)
one_hot_encode_sequence(seq, data)
file_shapes[chrom] = data.shape
_array_writer[mode](data, os.path.join(output_dir, chrom))
with open(os.path.join(output_dir, "metadata.json"), "w") as fp:
json.dump(
{
"file_shapes": file_shapes,
"type": "array_{}".format(mode),
"source": fasta,
},
fp,
)
示例3: reverse_bed
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def reverse_bed():
"""Convert bed-file coordinates to coordinates on the reverse strand."""
parser = argparse.ArgumentParser(
prog='reverse_bed',
description='Convert bed-file coordinates to coordinates on the reverse strand.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('bed_in', help='Input bed file.')
parser.add_argument('ref_fasta', help='Input reference fasta file.')
parser.add_argument('bed_out', help='Output bed file.')
args = parser.parse_args()
fasta = pysam.FastaFile(args.ref_fasta)
lengths = dict(zip(fasta.references, fasta.lengths))
d = pd.read_csv(args.bed_in, sep='\t', names=['chrom', 'start', 'stop'])
d['chrom_length'] = d['chrom'].map(lambda x: lengths[x])
d['rc_stop'] = d['chrom_length'] - d['start']
d['rc_start'] = d['chrom_length'] - d['stop']
d['chrom_rc'] = d['chrom'] + '_rc'
d[['chrom_rc', 'rc_start', 'rc_stop']].to_csv(args.bed_out, index=False, header=False, sep='\t')
示例4: get_gc_content
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def get_gc_content(regions, fasta):
""" Get GC content from regions in fasta """
nuc_count = {"T":0, "t":0, "A":0, "a":0, "G":1, "g":1, "C":1, "c":1}
gc = 0
total = 0
fasta_obj = pysam.FastaFile(fasta)
for region in regions:
seq = fasta_obj.fetch(region.chrom, region.start, region.end)
gc += sum([nuc_count.get(nuc, 0.5) for nuc in seq])
total += region.end - region.start
fasta_obj.close()
gc_content = gc / float(total)
return(gc_content)
#---------------------------------------------------------------------------------------------------------#
#------------------------------------------- Main functions ----------------------------------------------#
#---------------------------------------------------------------------------------------------------------#
示例5: test_neg_in_serialise
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def test_neg_in_serialise(self):
line = "tr_c114_g1_i1.mrna1.89\tProdigal_v2.6.3\tCDS\t2\t205\t28.7\t-\t0\t\
ID=85_1;partial=11;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.407;\
conf=99.86;score=28.71;cscore=27.10;sscore=1.61;rscore=0.00;uscore=0.00;tscore=1.61;"
line = GFF.GffLine(line)
self.assertFalse(line.header)
self.assertIsNotNone(line.id)
logger = create_default_logger("test_neg_in_serialise", "DEBUG")
fasta = pkg_resources.resource_filename("Mikado.tests", "mikado_prepared.fasta")
fai = pysam.FastaFile(fasta)
bed = bed12.BED12(line,
logger=logger,
max_regression=0.1,
start_adjustment=True,
fasta_index=fai,
transcriptomic=True)
self.assertFalse(bed.invalid, bed.invalid_reason)
示例6: __init__
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def __init__(self, fasta_file):
self.f = pysam.FastaFile(fasta_file)
示例7: fasta
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def fasta(self):
if self._fasta is None:
# import pyfaidx
# self._fasta = pyfaidx.Fasta(self.path, as_raw=True)
import pysam
self._fasta = pysam.FastaFile(self.path)
return self._fasta
示例8: __init__
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def __init__(self, referenceSetId, fastaFile):
super(ReferenceSetTest, self).__init__(referenceSetId, fastaFile)
self._fastaFile = pysam.FastaFile(fastaFile)
示例9: openFile
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def openFile(self, dataFile):
return pysam.FastaFile(dataFile)
示例10: __init__
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def __init__(self, datafile, use_strand=False, **kwargs):
"""Fasta file extractor
NOTE: The extractor is not thread-save.
If you with to use it with multiprocessing,
create a new extractor object in each process.
Args:
datafile (str): path to the bigwig file
use_strand (bool): if True, the extracted sequence
is reverse complemented in case interval.strand == "-"
"""
super(FastaExtractor, self).__init__(datafile, **kwargs)
self.use_strand = use_strand
self.fasta = FastaFile(self._datafile)
示例11: read_chrom_sizes_from_fasta
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def read_chrom_sizes_from_fasta(fastafile):
"""get chromosome size information from fasta file"""
out = {}
fasta = pysam.FastaFile(fastafile)
chr_names = fasta.references
chr_lengths = fasta.lengths
fasta.close()
for i in range(len(chr_lengths)):
out[chr_names[i]]=int(chr_lengths[i])
return out
示例12: get_sequence
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def get_sequence(chunk, fastafile):
"""obtain sequence for an interval
chunk: chunk object for which sequenceuence is to be fetched
fastafile: filename for fasta file with sequenceuence
"""
handle = pysam.FastaFile(fastafile)
sequence = handle.fetch(chunk.chrom, chunk.start, chunk.end)
if chunk.strand == "-":
sequence = reverse_complement(sequence)
handle.close()
return sequence.upper()
示例13: getNucFreqsFromChunkList
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def getNucFreqsFromChunkList(chunks, fasta, nucleotides):
"""Get nucleotide frequences within regions of genome"""
out = np.zeros(len(nucleotides))
n = 0.0
handle = pysam.FastaFile(fasta)
for chunk in chunks:
sequence = handle.fetch(chunk.chrom, chunk.start, chunk.end)
sequence = sequence.upper()
out += [sequence.count(i) for i in nucleotides]
n += len(sequence)
handle.close()
return out/n
示例14: __init__
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def __init__(self, filename):
self._fh = FastaFile(filename)
示例15: close
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import FastaFile [as 别名]
def close(self):
if self._fh:
self._fh.close()
self._fh = None
subprocess.check_call([self._bgzip_exe, "--force", self._basepath])
os.rename(self._basepath + ".gz", self.filename)
# open file with FastaFile to create indexes, then make all read-only
_fh = FastaFile(self.filename)
_fh.close()
os.chmod(self.filename, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
os.chmod(self.filename + ".fai", stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
os.chmod(self.filename + ".gzi", stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
_logger.info("{} written; added {} sequences".format(self.filename, len(self._added)))