本文整理汇总了Python中tabix.open函数的典型用法代码示例。如果您正苦于以下问题:Python open函数的具体用法?Python open怎么用?Python open使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了open函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: init_resource
def init_resource(self):
""" init features and other annotation resources """
for rname in ['dbsnp']:
if self.config.has_option(self.rv, 'dbsnp'):
import tabix
self.resources['dbsnp'] = tabix.open(self.config.get(self.rv, 'dbsnp'))
self.features = []
for rname in self.config.options(self.rv):
featdb = self.config.get(self.rv, rname)
if featdb.endswith('.featuredb'):
self.features.append((rname,tabix.open(featdb)))
示例2: get_cadd
def get_cadd(config, chrom, start, ref, alt):
'''
add cadd of variant
'''
tabix_fp = config['data_paths']['cadd']['whole_genome_cadd']
try:
tb = tabix.open(tabix_fp)
except:
logging.warning('{0} not available'.format(tabix_fp))
return np.nan
'''if stop != start:
print('WARNING: the start {0} is different than stop {1}'.format(start, stop))
return np.nan'''
try:
records = tb.querys(str(chrom) + ':' + str(start) + '-' + str(start))
except:
logging.warning('Error when trying to query {0}-{1}-{2}-{3}'.format(chrom, start, ref, alt))
return np.nan
for record in records:
if record[2] != ref:
logging.warning('Reference {0} is not the one in CADD for entry {1}-{2}-{3}-{4}'.format(ref, chrom, start, ref, alt))
return np.nan
if record[3] == alt:
return float(record[5])
logging.warning('I do not find a cadd entry for {0}-{1}-{2}-{4}'.format(alt, chrom, start, ref, alt))
return np.nan
示例3: checkAndOpen
def checkAndOpen(db):
if 'tabix' not in sys.modules:
return None
db = os.path.expanduser(db)
if not os.path.exists(db):
pass
else:
# 'source' is a variable used to title the column in the output
# it is defined by the user in the configuration script step when generating the JSON file
if os.path.splitext(db)[1] == ".gz" and os.path.exists(db + ".tbi"):
try:
database = gzip.open(db)
except IOError:
print("WARNING: could not open {}".format(db))
return None
elif os.path.splitext(db)[1] == ".vcf":
abortWithMessage("Error: database file {0} must compressed with bgzip".format(db))
elif os.path.splitext(db)[1] == ".gz" and not os.path.exists(db + ".tbi"):
abortWithMessage("Compressed database is not tabix indexed")
else: abortWithMessage("Error opening database files: {0}".format(db))
try:
row = database.readline()
except StopIteration:
print("Empty file {}".format(db))
return None
tb = tabix.open(db)
return tb
示例4: get_job_results
def get_job_results(job_id, job=None):
filters = request.args.to_dict()
epacts_filename = job.relative_path("output.epacts.gz")
with gzip.open(epacts_filename, "rt") as f:
header = f.readline().rstrip('\n').split('\t')
if header[1] == "BEG":
header[1] = "BEGIN"
if header[0] == "#CHROM":
header[0] = "CHROM"
assert len(header) > 0
headerpos = {x:i for i,x in enumerate(header)}
if filters.get("region", ""):
tb = tabix.open(epacts_filename)
indata = tb.query(chrom, start_pos, end_pos)
else:
indata = (x.split("\t") for x in gzip.open(epacts_filename))
pass_tests = []
if filters.get("non-monomorphic", False):
if "AC" not in headerpos:
raise Exception("Column AC not found")
ac_index = headerpos["AC"]
def mono_pass(row):
if float(row[ac_index])>0:
return True
else:
return False
pass_tests.append(mono_pass)
if "max-pvalue" in filters:
if "PVALUE" not in headerpos:
raise Exception("Column PVALUE not found")
pval_index = headerpos["PVALUE"]
thresh = float(filters.get("max-pvalue", 1))
def pval_pass(row):
if row[pval_index] == "NA":
return False
if float(row[pval_index])<thresh:
return True
else:
return False
pass_tests.append(pval_pass)
def pass_row(row):
if len(pass_tests)==0:
return True
for f in pass_tests:
if not f(row):
return False
return True
def generate():
yield "\t".join(header) + "\n"
next(indata) #skip header
for row in indata:
if pass_row(row):
yield "\t".join(row)
return Response(generate(), mimetype="text/plain")
示例5: extract_CADD_score
def extract_CADD_score(arguments, q):
vcf_record, caddfile = arguments
tb = tabix.open(caddfile)
chromosome = (vcf_record.CHROM).replace("chr","")
vcf_record.INFO["RAWCADD"] = 0
vcf_record.INFO["PHREDCADD"] = 0
# Specific for CADD files
# FIXME: get info about chr or not from provided VCF file
records = tb.query(chromosome, vcf_record.POS-1, vcf_record.POS)
# Look for matching mutation
# Works for SNVs, InDels optimisation is ongoing
for rec in records:
if rec[3] == vcf_record.ALT[0]:
# FIXME: Make requested fields optional through arguments
vcf_record.INFO["RAWCADD"] = rec[4]
vcf_record.INFO["PHREDCADD"] = rec[5]
break
# workaround since multiprocess can't handle VCF record class objects
# FIXME: use VCF class records rather than this ugly string
annotated = VCF_WRITER._map(str, [vcf_record.CHROM, vcf_record.POS, vcf_record.ID, vcf_record.REF]) + [VCF_WRITER._format_alt(vcf_record.ALT), str(vcf_record.QUAL) or '.', VCF_WRITER._format_filter(vcf_record.FILTER), VCF_WRITER._format_info(vcf_record.INFO)]
# Return results to Queue
q.put(annotated)
return(annotated)
示例6: get_exons
def get_exons(chrom, start, stop, file):
tb = tabix.open(file)
records = tb.query(chrom, start, stop)
exons = []
for record in records:
exons.append(record)
return exons
示例7: __init__
def __init__(self, _snp, _ref, _vcf, _restrict, \
_num_ctrls, _window, _match_context):
self.snp = _snp
self.ref = pyfasta.Fasta(_ref)
self.vcf = tabix.open(_vcf)
if _restrict is not None:
self.restrict = tabix.open(_restrict)
else: self.restrict = None
self.chromToKey = {}
for k in self.ref.keys():
chrom = k.split()[0]
self.chromToKey[chrom] = k
self.num_ctrls = _num_ctrls
self.window = _window
self.match_context = _match_context
if self.match_context >= 0:
self.snp_context = self.GetContext(self.snp)
示例8: main
def main(args):
chrom, coords = loadCoords(args.bedFile)
tb = ""
if chrom:
tabix.open(
"/home/evansj/me/data/ExAC/coverage/ftp.broadinstitute.org/pub/ExAC_release/current/coverage/Panel.chr%s.coverage.txt.gz"
% (chrom,)
)
with open(args.outFile, "w") as fout:
if chrom:
for st in coords:
# st is 1-idx in coords
# tabix needs 0-based
records = tb.query(chrom, st - 1, st)
for record in records:
thisChrom, pos, mean, median, c1, c5, c10, c15 = record[0:8]
print("\t".join((thisChrom, pos, c10)), file=fout)
示例9: tabix_vcf
def tabix_vcf(vcf_file, in_chr, in_start, in_stop):
"""A generator to get records in a VCF given a location."""
chrom = str(in_chr); start = int(in_start); stop = int(in_stop)
try:
vcf_tb = tabix.open(vcf_file)
for rec in vcf_tb.query(chrom, start, stop):
yield rec
except:
return
示例10: get_tabixhandle
def get_tabixhandle(path):
"""Check if a file is zipped and that the index exists
If something looks wierd raise a TabixError
"""
if not path.endswith('.gz'):
raise TabixError("File {0} does not end with '.gz'".format(path))
index_file = path + '.tbi'
if not os.path.isfile(index_file):
raise TabixError("No index could be found for {0}".format(path))
return tabix.open(path)
示例11: __init__
def __init__(self, args):
self.args = args
# parse out TransciptInfos
print('Loading transcripts...', file=sys.stderr)
self.tx_infos = self._parse_tx_infos(args.gencode_gtf)
self.tx_info_by_id = dict([(info.transcript_id, info) for info in self.tx_infos])
# open tabix file
print('Opening tabix file...', file=sys.stderr)
self.tabix = tabix.open(args.gencode_gtf)
# open BAM file and iterate over it
print('Opening BAM file...', file=sys.stderr)
self.sam_file = pysam.AlignmentFile(args.alignment_bam, 'r')
示例12: ld_expand
def ld_expand(df, ld_beds):
"""
Expand a set of SNVs into all SNVs with LD >= 0.8 and return a BedTool of
the expanded SNPs.
Parameters
----------
df : pandas.DataFrame
Pandas dataframe with SNVs. The index is of the form chrom:pos where pos
is the one-based position of the SNV. The columns are chrom, start, end.
chrom, start, end make a zero-based bed file with the SNV coordinates.
ld_beds : dict
Dict whose keys are chromosomes and whose values are filenames of
tabixed LD bed files. The LD bed files should be formatted like this:
chr1 14463 14464 14464:51479:0.254183
where the the first three columns indicate the zero-based coordinates of
a SNV and the the fourth column has the one-based coordinate of that
SNV, the one-based coordinate of another SNV on the same chromosome, and
the LD between these SNVs (all separated by colons).
Returns
-------
bt : pybedtools.BedTool
BedTool with input SNVs and SNVs they are in LD with.
indepdent SNVs.
"""
import pybedtools as pbt
import tabix
out_snps = []
for chrom in ld_beds.keys():
t = tabix.open(ld_beds[chrom])
tdf = df[df['chrom'].astype(str) == chrom]
for ind in tdf.index:
p = tdf.ix[ind, 'end']
out_snps.append('{}\t{}\t{}\t{}\n'.format(chrom, p - 1, p, ind))
try:
r = t.query('{}'.format(chrom), p - 1, p)
while True:
try:
n = r.next()
p1, p2, r2 = n[-1].split(':')
if float(r2) >= 0.8:
out_snps.append('{}\t{}\t{}\t{}\n'.format(
n[0], int(p2) - 1, int(p2), ind))
except StopIteration:
break
except tabix.TabixError:
continue
bt = pbt.BedTool(''.join(out_snps), from_string=True)
bt = bt.sort()
return bt
示例13: get_genotypes
def get_genotypes(CpG_location):
import tabix
import pandas as pd
tb_file = "/path/to/file/DF_meth_variants.gz"
df = pd.DataFrame(columns=xrange(0,782))
tb = tabix.open(tb_file)
# print CpG_location
records = tb.querys(CpG_location)
num = 0
for record in records:
df.loc[num] = record[3:]
num += 1
return(df)
示例14: test_same_aa_different_positions
def test_same_aa_different_positions(self):
''' check that same_aa() works correctly for different amino acids
'''
lines = make_vcf_header()
lines.append(make_vcf_line(pos=5, extra='Protein_position=2'))
lines.append(make_vcf_line(pos=7, extra='Protein_position=3'))
lines.append(make_vcf_line(pos=8, extra='Protein_position=4'))
self.write_vcf(lines)
vcf = tabix.open(self.path)
pairs = [[('1', 7), ('1', 8)]]
self.assertEqual(same_aa(vcf, pairs), [])
示例15: test_same_aa
def test_same_aa(self):
''' check that same_aa() works correctly
'''
# get the VCF lines
lines = make_vcf_header()
lines.append(make_vcf_line(pos=2, extra='Protein_position=1'))
lines.append(make_vcf_line(pos=4, extra='Protein_position=1'))
self.write_vcf(lines)
vcf = tabix.open(self.path)
pairs = [[('1', 2), ('1', 4)]]
self.assertEqual(same_aa(vcf, pairs), [[('1', 2), ('1', 4)]])