Python tabix.open函数代码示例

本文整理汇总了Python中tabix.open函数的典型用法代码示例。如果您正苦于以下问题：Python open函数的具体用法？Python open怎么用？Python open使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了open函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: init_resource

    def init_resource(self):
        """ init features and other annotation resources """
        for rname in ['dbsnp']:
            if self.config.has_option(self.rv, 'dbsnp'):
                import tabix
                self.resources['dbsnp'] = tabix.open(self.config.get(self.rv, 'dbsnp'))

        self.features = []
        for rname in self.config.options(self.rv):
            featdb =  self.config.get(self.rv, rname)
            if featdb.endswith('.featuredb'):
                self.features.append((rname,tabix.open(featdb)))

开发者ID:zwdzwd，项目名称:transvar，代码行数:12，代码来源:annodb.py

示例2: get_cadd

def get_cadd(config, chrom, start, ref, alt):
    '''
    add cadd of variant
    '''
    tabix_fp = config['data_paths']['cadd']['whole_genome_cadd']
    try:
        tb = tabix.open(tabix_fp)
    except:
        logging.warning('{0} not available'.format(tabix_fp))
        return np.nan

    '''if stop != start:
        print('WARNING: the start {0} is different than stop {1}'.format(start, stop))
        return np.nan'''

    try:
        records = tb.querys(str(chrom) + ':' + str(start) + '-' + str(start))
    except:
        logging.warning('Error when trying to query {0}-{1}-{2}-{3}'.format(chrom, start, ref, alt))
        return np.nan

    for record in records:
        if record[2] != ref:
            logging.warning('Reference {0} is not the one in CADD for entry {1}-{2}-{3}-{4}'.format(ref, chrom, start, ref, alt))
            return np.nan
        if record[3] == alt:
            return float(record[5])

    logging.warning('I do not find a cadd entry for {0}-{1}-{2}-{4}'.format(alt, chrom, start, ref, alt))
    return np.nan

开发者ID:DavidTamborero，项目名称:MTBR，代码行数:30，代码来源:ct_lib.py

示例3: checkAndOpen

def checkAndOpen(db):
    if 'tabix' not in sys.modules:
        return None

    db = os.path.expanduser(db)
    if not os.path.exists(db):
        pass
    else:
        # 'source' is a variable used to title the column in the output
        # it is defined by the user in the configuration script step when generating the JSON file
        if os.path.splitext(db)[1] == ".gz" and os.path.exists(db + ".tbi"):
            try:
                database = gzip.open(db)
            except IOError:
                print("WARNING: could not open {}".format(db))
                return None
        elif os.path.splitext(db)[1] == ".vcf":
            abortWithMessage("Error: database file {0} must compressed with bgzip".format(db))
        elif os.path.splitext(db)[1] == ".gz" and not os.path.exists(db + ".tbi"):
            abortWithMessage("Compressed database is not tabix indexed")
        else: abortWithMessage("Error opening database files: {0}".format(db))
        
        try:
            row = database.readline()
        except StopIteration: 
            print("Empty file {}".format(db))
            return None

        tb = tabix.open(db)
        return tb

开发者ID:blachlylab，项目名称:mucor，代码行数:30，代码来源:databases.py

示例4: get_job_results

def get_job_results(job_id, job=None):
    filters = request.args.to_dict()
    epacts_filename = job.relative_path("output.epacts.gz")
    with gzip.open(epacts_filename, "rt") as f:
        header = f.readline().rstrip('\n').split('\t')
        if header[1] == "BEG":
            header[1] = "BEGIN"
        if header[0] == "#CHROM":
            header[0] = "CHROM"
    assert len(header) > 0
    headerpos = {x:i for i,x in enumerate(header)}

    if filters.get("region", ""):
        tb = tabix.open(epacts_filename)
        indata = tb.query(chrom, start_pos, end_pos)
    else:
        indata = (x.split("\t") for x in gzip.open(epacts_filename))

    pass_tests = []
    if filters.get("non-monomorphic", False):
        if "AC" not in headerpos:
            raise Exception("Column AC not found")
        ac_index = headerpos["AC"]
        def mono_pass(row):
            if float(row[ac_index])>0:
                return True
            else:
                return False
        pass_tests.append(mono_pass)

    if "max-pvalue" in filters:
        if "PVALUE" not in headerpos:
            raise Exception("Column PVALUE not found")
        pval_index = headerpos["PVALUE"]
        thresh = float(filters.get("max-pvalue", 1))
        def pval_pass(row):
            if row[pval_index] == "NA":
                return False
            if float(row[pval_index])<thresh:
                return True
            else:
                return False
        pass_tests.append(pval_pass)

    def pass_row(row):
        if len(pass_tests)==0:
            return True
        for f in pass_tests:
            if not f(row):
                return False
        return True

    def generate():
        yield "\t".join(header) + "\n"
        next(indata) #skip header
        for row in indata:
            if pass_row(row):
                yield "\t".join(row)

    return Response(generate(), mimetype="text/plain")

开发者ID:statgen，项目名称:gasp，代码行数:60，代码来源:api_blueprint.py

示例5: extract_CADD_score

def extract_CADD_score(arguments, q):
	vcf_record, caddfile = arguments
	
	tb = tabix.open(caddfile)

	chromosome = (vcf_record.CHROM).replace("chr","")
	vcf_record.INFO["RAWCADD"]   = 0
	vcf_record.INFO["PHREDCADD"] = 0

	# Specific for CADD files
	# FIXME: get info about chr or not from provided VCF file
	records = tb.query(chromosome, vcf_record.POS-1, vcf_record.POS)

	# Look for matching mutation
	# Works for SNVs, InDels optimisation is ongoing
	for rec in records:
		if rec[3] == vcf_record.ALT[0]:
			# FIXME: Make requested fields optional through arguments
			vcf_record.INFO["RAWCADD"]   = rec[4]
			vcf_record.INFO["PHREDCADD"] = rec[5]
			break
	
	# workaround since multiprocess can't handle VCF record class objects
	# FIXME: use VCF class records rather than this ugly string
	annotated = VCF_WRITER._map(str, [vcf_record.CHROM, vcf_record.POS, vcf_record.ID, vcf_record.REF]) + [VCF_WRITER._format_alt(vcf_record.ALT), str(vcf_record.QUAL) or '.', VCF_WRITER._format_filter(vcf_record.FILTER), VCF_WRITER._format_info(vcf_record.INFO)]

	# Return results to Queue
	q.put(annotated)
	return(annotated)

开发者ID:jdeligt，项目名称:Genetics，代码行数:29，代码来源:Annotate_CADD_Scores_In_VCF.py

示例6: get_exons

def get_exons(chrom, start, stop, file):
	tb = tabix.open(file)
	records = tb.query(chrom, start, stop)
	exons = []
	for record in records:
		exons.append(record)
	return exons

开发者ID:niab，项目名称:exac-scripts，代码行数:7，代码来源:variants_reader.py

示例7: init

 def __init__(self, _snp, _ref, _vcf, _restrict, \
                  _num_ctrls, _window, _match_context):
     self.snp = _snp
     self.ref = pyfasta.Fasta(_ref)
     self.vcf = tabix.open(_vcf)
     if _restrict is not None:
         self.restrict = tabix.open(_restrict)
     else: self.restrict = None
     self.chromToKey = {}
     for k in self.ref.keys():
         chrom = k.split()[0]
         self.chromToKey[chrom] = k
     self.num_ctrls = _num_ctrls
     self.window = _window
     self.match_context = _match_context
     if self.match_context >= 0:
         self.snp_context = self.GetContext(self.snp)

开发者ID:mgymrek，项目名称:non-coding-annotations，代码行数:17，代码来源:annotation_score.py

示例8: main

def main(args):
    chrom, coords = loadCoords(args.bedFile)
    tb = ""
    if chrom:
        tabix.open(
            "/home/evansj/me/data/ExAC/coverage/ftp.broadinstitute.org/pub/ExAC_release/current/coverage/Panel.chr%s.coverage.txt.gz"
            % (chrom,)
        )

    with open(args.outFile, "w") as fout:
        if chrom:
            for st in coords:
                # st is 1-idx in coords
                # tabix needs 0-based
                records = tb.query(chrom, st - 1, st)
                for record in records:
                    thisChrom, pos, mean, median, c1, c5, c10, c15 = record[0:8]
                    print("\t".join((thisChrom, pos, c10)), file=fout)

开发者ID:samesense，项目名称:target_exac_setup，代码行数:18，代码来源:pullGeneCov.py

示例9: tabix_vcf

def tabix_vcf(vcf_file, in_chr, in_start, in_stop):
	"""A generator to get records in a VCF given a location."""
	chrom = str(in_chr); start = int(in_start); stop = int(in_stop)
	try:
		vcf_tb = tabix.open(vcf_file)
		for rec in vcf_tb.query(chrom, start, stop):
			yield rec
	except:
		return

开发者ID:Jana-A，项目名称:WTSI.DDD-VET，代码行数:9，代码来源:parsing_setups.py

示例10: get_tabixhandle

def get_tabixhandle(path):
    """Check if a file is zipped and that the index exists
        If something looks wierd raise a TabixError
    """
    if not path.endswith('.gz'):
        raise TabixError("File {0} does not end with '.gz'".format(path))
    index_file = path + '.tbi'
    if not os.path.isfile(index_file):
        raise TabixError("No index could be found for {0}".format(path))
    
    return tabix.open(path)

开发者ID:moonso，项目名称:genmod，代码行数:11，代码来源:read_tabix_files.py

示例11: init

 def __init__(self, args):
     self.args = args
     # parse out TransciptInfos
     print('Loading transcripts...', file=sys.stderr)
     self.tx_infos = self._parse_tx_infos(args.gencode_gtf)
     self.tx_info_by_id = dict([(info.transcript_id, info) for info in self.tx_infos])
     # open tabix file
     print('Opening tabix file...', file=sys.stderr)
     self.tabix = tabix.open(args.gencode_gtf)
     # open BAM file and iterate over it
     print('Opening BAM file...', file=sys.stderr)
     self.sam_file = pysam.AlignmentFile(args.alignment_bam, 'r')

开发者ID:holtgrewe，项目名称:linc_splice，代码行数:12，代码来源:map_linc.py

示例12: ld_expand

def ld_expand(df, ld_beds):
    """
    Expand a set of SNVs into all SNVs with LD >= 0.8 and return a BedTool of
    the expanded SNPs.
    
    Parameters
    ----------
    df : pandas.DataFrame
        Pandas dataframe with SNVs. The index is of the form chrom:pos where pos
        is the one-based position of the SNV. The columns are chrom, start, end.
        chrom, start, end make a zero-based bed file with the SNV coordinates.

    ld_beds : dict
        Dict whose keys are chromosomes and whose values are filenames of
        tabixed LD bed files. The LD bed files should be formatted like this:
            chr1    14463   14464   14464:51479:0.254183
        where the the first three columns indicate the zero-based coordinates of
        a SNV and the the fourth column has the one-based coordinate of that
        SNV, the one-based coordinate of another SNV on the same chromosome, and
        the LD between these SNVs (all separated by colons).

    Returns
    -------
    bt : pybedtools.BedTool
        BedTool with input SNVs and SNVs they are in LD with.
        indepdent SNVs.
    """
    import pybedtools as pbt
    import tabix
    out_snps = []
    for chrom in ld_beds.keys():
        t = tabix.open(ld_beds[chrom])
        tdf = df[df['chrom'].astype(str) == chrom]
        for ind in tdf.index:
            p = tdf.ix[ind, 'end']
            out_snps.append('{}\t{}\t{}\t{}\n'.format(chrom, p - 1, p, ind))
            try:
                r = t.query('{}'.format(chrom), p - 1, p)
                while True:
                    try:
                        n = r.next()
                        p1, p2, r2 = n[-1].split(':')
                        if float(r2) >= 0.8:
                            out_snps.append('{}\t{}\t{}\t{}\n'.format(
                                n[0], int(p2) - 1, int(p2), ind))
                    except StopIteration:
                        break
            except tabix.TabixError:
                continue
    bt = pbt.BedTool(''.join(out_snps), from_string=True)
    bt = bt.sort()
    return bt

开发者ID:cdeboever3，项目名称:cdpybio，代码行数:52，代码来源:analysis.py

示例13: get_genotypes

def get_genotypes(CpG_location):
    import tabix
    import pandas as pd
    tb_file   = "/path/to/file/DF_meth_variants.gz"
    df        = pd.DataFrame(columns=xrange(0,782))
    tb        = tabix.open(tb_file)
#    print CpG_location
    records   = tb.querys(CpG_location)
    num       = 0
    for record in records:
        df.loc[num] = record[3:]
        num        += 1
    return(df)

开发者ID:CrystalHumphries，项目名称:MethylationCorrelationBlock，代码行数:13，代码来源:try_library.py

示例14: test_same_aa_different_positions

 def test_same_aa_different_positions(self):
     ''' check that same_aa() works correctly for different amino acids
     '''
     
     lines = make_vcf_header()
     lines.append(make_vcf_line(pos=5, extra='Protein_position=2'))
     lines.append(make_vcf_line(pos=7, extra='Protein_position=3'))
     lines.append(make_vcf_line(pos=8, extra='Protein_position=4'))
     self.write_vcf(lines)
     
     vcf = tabix.open(self.path)
     pairs = [[('1', 7), ('1', 8)]]
     
     self.assertEqual(same_aa(vcf, pairs), [])

开发者ID:jeremymcrae，项目名称:clinical-filter，代码行数:14，代码来源:test_multinucleotide_variants.py

示例15: test_same_aa

 def test_same_aa(self):
     ''' check that same_aa() works correctly
     '''
     
     # get the VCF lines
     lines = make_vcf_header()
     lines.append(make_vcf_line(pos=2, extra='Protein_position=1'))
     lines.append(make_vcf_line(pos=4, extra='Protein_position=1'))
     self.write_vcf(lines)
     
     vcf = tabix.open(self.path)
     pairs = [[('1', 2), ('1', 4)]]
     
     self.assertEqual(same_aa(vcf, pairs), [[('1', 2), ('1', 4)]])