本文整理汇总了Python中pysam.TabixFile方法的典型用法代码示例。如果您正苦于以下问题:Python pysam.TabixFile方法的具体用法?Python pysam.TabixFile怎么用?Python pysam.TabixFile使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pysam
的用法示例。
在下文中一共展示了pysam.TabixFile方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import TabixFile [as 别名]
def __init__(self, properties_dict):
super(BedGraphTrack, self).__init__(properties_dict)
self.load_file()
self.tbx2 = None
self.interval_tree2 = None
if 'second_file' in self.properties['operation']:
if self.properties['second_file'] is None:
raise InputError("operation: {} requires to set the parameter"
" second_file."
"".format(self.properties['operation']))
else:
if self.properties['second_file'].endswith(".bgz"):
# from the tabix file is not possible to know the
# global min and max
try:
self.tbx2 = pysam.TabixFile(self.properties['second_file'])
except IOError:
self.interval_tree2, __, __ = file_to_intervaltree(self.properties['second_file'])
# load the file as an interval tree
else:
self.interval_tree2, __, __ = file_to_intervaltree(self.properties['second_file'])
示例2: findNFRs
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import TabixFile [as 别名]
def findNFRs(self):
"""find NFR regions"""
region = np.ones(self.length())
tbx = pysam.TabixFile(self.params.calls)
nucs = []
if self.chrom in tbx.contigs:
for row in tbx.fetch(self.chrom, self.start, self.end, parser = pysam.asTuple()):
nucs.append(int(row[1]))
for j in xrange(1,len(nucs)):
left = nucs[j-1] + 73
right = nucs[j] - 72
if right <= left:
continue
candidate = NFR(left, right, self)
if candidate.min_upper < self.params.max_occ_upper and candidate.occ < self.params.max_occ:
self.nfrs.append(candidate)
示例3: _get_read_depths
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import TabixFile [as 别名]
def _get_read_depths(cls, read_depths_file, sequence_name, position):
'''Returns total read depth and depth of reads supporting alternative (if present)'''
assert os.path.exists(read_depths_file)
assert os.path.exists(read_depths_file + '.tbi')
tbx = pysam.TabixFile(read_depths_file)
try:
rows = [x for x in tbx.fetch(sequence_name, position, position + 1)]
except:
return None
if len(rows) > 1: # which happens with indels, mutiple lines for same base of reference
test_rows = [x for x in rows if x.rstrip().split()[3] != '.']
if len(test_rows) != 1:
rows = [rows[-1]]
else:
rows = test_rows
if len(rows) == 1:
r, p, ref_base, alt_base, ref_counts, alt_counts = rows[0].rstrip().split()
bases = ref_base if alt_base == '.' else ref_base + ',' + alt_base
return bases, int(ref_counts), alt_counts
else:
return None
示例4: fetch_from_tabix
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import TabixFile [as 别名]
def fetch_from_tabix(path, chrom, start, end):
import pysam
bed = pysam.TabixFile(path)
chrom = match_chrom_format(chrom, bed.contigs)
for locus in bed.fetch(chrom, start, end):
locus = locus.split()
yield tx_from_bedfields(locus)
示例5: __init__
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import TabixFile [as 别名]
def __init__(self, path):
tabix = None
if path.endswith(".gz"):
# Might be a tabix file
try:
tabix = pysam.TabixFile(path)
except:
tabix = None
if tabix:
self.reader = _Tabix(tabix)
else:
self.reader = _NonIndexed(path)
示例6: load_file
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import TabixFile [as 别名]
def load_file(self):
self.tbx = None
# try to load a tabix file is available
if self.properties['file'].endswith(".bgz"):
# from the tabix file is not possible to know the
# global min and max
try:
self.tbx = pysam.TabixFile(self.properties['file'])
except IOError:
self.interval_tree, __, __ = file_to_intervaltree(self.properties['file'])
# load the file as an interval tree
else:
self.interval_tree, __, __ = file_to_intervaltree(self.properties['file'])
self.num_fields = None
示例7: clinvar_tabix
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import TabixFile [as 别名]
def clinvar_tabix(test_root):
return TabixFile(
str(
test_root.joinpath("examples/annotations/clinvar_chrom_22_only.b37.tsv.gz")
),
encoding="utf8",
)
示例8: load_fragments
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import TabixFile [as 别名]
def load_fragments(options, sample, dataset, chrom=None, start=None, end=None, usecols=None,
min_reads_per_frag=1):
if start is not None:
if start < 0:
raise Exception("start coord is negative: {}:{}-{}".format(chrom, start, end))
if end is not None:
if start >= end:
raise Exception("end coord is before start: {}:{}-{}".format(chrom, start, end))
readclouds_path = os.path.join(
options.results_dir,
"CombineReadcloudsStep",
"readclouds.{}.{}.tsv.gz".format(sample.name, dataset.id))
tabix = pysam.TabixFile(readclouds_path)
if chrom is not None and chrom not in tabix.contigs:
#print("MISSING:", chrom)
return pandas.DataFrame(columns="chrom start_pos end_pos bc num_reads obs_len hap".split())
if usecols is not None and "num_reads" not in usecols:
usecols.append("num_reads")
s = StringIO.StringIO("\n".join(tabix.fetch(chrom, start, end)))
readclouds = pandas.read_table(s, header=None, names=Readcloud._fields, usecols=usecols)
readclouds["chrom"] = readclouds["chrom"].astype("string")
if min_reads_per_frag > 0:
readclouds = readclouds.loc[readclouds["num_reads"]>min_reads_per_frag]
return readclouds
示例9: IndexedVariantFileReader
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import TabixFile [as 别名]
def IndexedVariantFileReader(phenocode):
filepath = common_filepaths['pheno_gz'](phenocode)
with read_gzip(filepath) as f:
reader = csv.reader(f, dialect='pheweb-internal-dialect')
fields = next(reader)
if fields[0].startswith('#'): # previous version of PheWeb commented the header line
fields[0] = fields[0][1:]
for field in fields:
assert field in conf.parse.per_variant_fields or field in conf.parse.per_assoc_fields, field
colidxs = {field: idx for idx, field in enumerate(fields)}
with pysam.TabixFile(filepath, parser=None) as tabix_file:
yield _ivfr(tabix_file, colidxs)
示例10: context
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import TabixFile [as 别名]
def context(self):
with pysam.TabixFile(self._filepath, parser=None) as tabix_file:
yield _mr(tabix_file, self._colidxs, self._colidxs_for_pheno, self._info_for_pheno)
示例11: get_tabix_iter
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import TabixFile [as 别名]
def get_tabix_iter(chrm, start, end, datafile):
"""Gets an iterator from a tabix BED/GFF/GFF3 file
Used to avoid chrX vs. X notation issues when extracting data from
annotation files
"""
tbx = pysam.TabixFile(datafile)
itr = None
try:
itr = tbx.fetch(chrm, max(0, start - 1000), end + 1000)
except ValueError:
# try and account for chr/no chr prefix
if chrm[:3] == "chr":
chrm = chrm[3:]
else:
chrm = "chr" + chrm
try:
itr = tbx.fetch(chrm, max(0, start - 1000), end + 1000)
except ValueError as e:
print(
"Warning: Could not fetch "
+ chrm
+ ":"
+ str(start)
+ "-"
+ str(end)
+ " from "
+ datafile,
file=sys.stderr,
)
print(e)
return itr
# }}}
##Coverage methods
# {{{def add_coverage(bam_file, read, coverage, separate_mqual):
示例12: validate
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import TabixFile [as 别名]
def validate(self):
assert os.path.exists(self.bam), "missing bam file '{}' for sample '{}' and dataset '{}'".format(
self.bam, self.sample.name, self.id)
# @staticmethod
# def from_longranger_dir(self, longranger_dir):
# fragments = os.path.join(longranger_dir,
# "PHASER_SVCALLER_CS/PHASER_SVCALLER/_REPORTER/"
# "REPORT_SINGLE_PARTITION/fork0/files/fragments.h5")
# bam = os.path.join(longranger_dir,
# "PHASER_SVCALLER_CS/PHASER_SVCALLER/ATTACH_PHASING/"
# "fork0/files/phased_possorted_bam.bam")
# phased_fragments = os.path.join(longranger_dir,
# "10XSARCOMAC1/PHASER_SVCALLER_CS/PHASER_SVCALLER/"
# "_SNPINDEL_PHASER/PHASE_SNPINDELS/fork0/files/"
# "fragment_phasing.tsv.gz")
# self.validate()
# return TenXDataset(bam, fragments, phased_fragments)
# def load_phased_fragments(self, chrom=None, start=None, end=None):
# columns = ["chrom", "start_pos", "end_pos", "phase_set", "ps_start",
# "ps_end", "bc", "h0", "h1", "hmix", "unkn"]
# try:
# tabix = pysam.TabixFile(self.phased_fragments)
# s = StringIO.StringIO("\n".join(tabix.fetch(chrom, start, end)))
# frags = pandas.read_table(s)
# frags.columns = columns
# except (IOError, ValueError):
# frags = pandas.DataFrame(columns=columns)
# return frags
# def load_fragments(self, chrom=None, start=None, end=None):
# tabix = pysam.TabixFile()
# try:
# fragments = utilities.read_data_frame(self.fragments)
# goodbcs = utilities.get_good_barcodes(fragments)
# fragments = fragments.loc[fragments["bc"].isin(goodbcs)]
# # fragments = fragments.loc[fragments["num_reads"]>5]
# if chrom is not None:
# fragments = fragments.loc[fragments["chrom"]==chrom]
# return fragments
# except:
# logging.exception("Unable to load fragments from fragments file "
# "'{}'".format(self.fragments))
# raise
示例13: __init__
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import TabixFile [as 别名]
def __init__(
self,
filepath,
chromsizes,
bins,
map=map,
n_chunks=1,
is_one_based=False,
**kwargs
):
try:
import pysam
except ImportError:
raise ImportError("pysam is required to read tabix files")
import dill
import pickle
dill.settings["protocol"] = pickle.HIGHEST_PROTOCOL
self._map = map
self.n_chunks = n_chunks
self.is_one_based = bool(is_one_based)
self.C2 = kwargs.pop("C2", 3)
self.P2 = kwargs.pop("P2", 4)
# all requested contigs will be placed in the output matrix
self.gs = GenomeSegmentation(chromsizes, bins)
# find available contigs in the contact list
self.filepath = filepath
self.n_records = None
with pysam.TabixFile(filepath, "r", encoding="ascii") as f:
try:
self.file_contigs = [c.decode("ascii") for c in f.contigs]
except AttributeError:
self.file_contigs = f.contigs
if not len(self.file_contigs):
raise RuntimeError("No reference sequences found.")
# warn about requested contigs not seen in the contact list
for chrom in self.gs.contigs:
if chrom not in self.file_contigs:
warnings.warn(
"Did not find contig " + " '{}' in contact list file.".format(chrom)
)
warnings.warn(
"NOTE: When using the Tabix aggregator, make sure the order of "
"chromosomes in the provided chromsizes agrees with the chromosome "
"ordering of read ends in the contact list file."
)
示例14: get_reads
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import TabixFile [as 别名]
def get_reads(self, cluster_name, out1, out2=None, fasta=False, log_fh=None, wanted_ids=None):
total_reads = 0
total_bases = 0
if log_fh is not None:
print('Getting reads for', cluster_name, 'from', self.outfile, file=log_fh)
tabix_file = pysam.TabixFile(self.outfile)
f_out1 = pyfastaq.utils.open_file_write(out1)
if out2 is None:
f_out2 = f_out1
else:
f_out2 = pyfastaq.utils.open_file_write(out2)
for line in tabix_file.fetch(reference=cluster_name):
cluster, number, seq, qual = line.rstrip().split()
number = int(number)
if wanted_ids is not None:
new_number = number if number % 2 else number - 1
if new_number not in wanted_ids:
continue
if number % 2 == 0:
if fasta:
print('>' + str(number - 1) + '/2', seq, sep='\n', file=f_out2)
else:
print('@' + str(number - 1) + '/2', seq, '+', qual, sep='\n', file=f_out2)
else:
if fasta:
print('>' + str(number) + '/1', seq, sep='\n', file=f_out1)
else:
print('@' + str(number) + '/1', seq, '+', qual, sep='\n', file=f_out1)
total_reads += 1
total_bases += len(qual)
pyfastaq.utils.close(f_out1)
if out2 is not None:
pyfastaq.utils.close(f_out2)
if log_fh is not None:
print('Finished getting reads for', cluster_name, 'from', self.outfile, file=log_fh)
return total_reads, total_bases