当前位置: 首页>>代码示例>>Python>>正文


Python pysam.asVCF函数代码示例

本文整理汇总了Python中pysam.asVCF函数的典型用法代码示例。如果您正苦于以下问题:Python asVCF函数的具体用法?Python asVCF怎么用?Python asVCF使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了asVCF函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: testFromTabix

        def testFromTabix(self):

            # use ascii encoding - should raise error
            t = pysam.TabixFile(self.tmpfilename + ".gz", encoding="ascii")
            results = list(t.fetch(parser=pysam.asVCF()))
            self.assertRaises(UnicodeDecodeError, getattr, results[1], "id")

            t = pysam.TabixFile(self.tmpfilename + ".gz", encoding="utf-8")
            results = list(t.fetch(parser=pysam.asVCF()))
            self.assertEqual(getattr(results[1], "id"), u"Rene\xe9")
开发者ID:humburg,项目名称:pysam,代码行数:10,代码来源:tabix_test.py

示例2: fetch

 def fetch(self, chrom, start, end):
     """ yield tuples (vcf object + info dict key->val) for a range 
     vcf row attributes are: 
     contig
     pos chromosomal position, zero-based
     id
     ref reference
     alt alt
     qual qual
     filter filter
     info info
     format format specifier.
     """
     chrom = chrom.replace("chr","")
     #fname = self.fnameDict[chrom]
     #vcf = pysam.VCF()
     #vcf.connect(fname)
     tbi = self.fhDict[chrom]
     it = tbi.fetch(chrom, start, end, parser=pysam.asVCF())
     for row in it:
         infoDict = {}
         infoStr = row.info
         for keyVal in infoStr.split(";"):
             if "=" not in keyVal:
                 continue
             key, val = keyVal.split("=")
             infoDict[key] = val
         yield row, infoDict
开发者ID:Moxikai,项目名称:pubMunch,代码行数:28,代码来源:pubVcf.py

示例3: testRead

    def testRead(self):

        ncolumns = len(self.columns)

        for x, r in enumerate(self.tabix.fetch(parser=pysam.asVCF())):
            c = self.compare[x]
            for y, field in enumerate(self.columns):
                # it is ok to have a missing format column
                if y == 8 and y == len(c):
                    continue
                if field == "pos":
                    self.assertEqual(int(c[y]) - 1, getattr(r, field))
                    self.assertEqual(int(c[y]) - 1, r.pos)
                else:
                    self.assertEqual(c[y], getattr(r, field),
                                     "mismatch in field %s: %s != %s" %
                                     (field, c[y], getattr(r, field)))
            if len(c) == 8:
                self.assertEqual(0, len(r))
            else:
                self.assertEqual(len(c), len(r) + ncolumns)

            for y in range(len(c) - ncolumns):
                self.assertEqual(c[ncolumns + y], r[y])
            self.assertEqual("\t".join(map(str, c)),
                             str(r))
开发者ID:zengfengbo,项目名称:pysam,代码行数:26,代码来源:tabix_test.py

示例4: filter_vcfs

def filter_vcfs(genotype, contig, start, end):
    if contig in genotype.contigs:
        parser = pysam.asVCF()
        # This raises a ValueError if the VCF does not
        # contain any entries for the specified contig.
        for vcf in genotype.fetch(contig, start, end, parser=parser):
            if vcf.filter in ("PASS", "."):
                yield vcf
开发者ID:health1987,项目名称:paleomix,代码行数:8,代码来源:vcf_to_fasta.py

示例5: fetch

    def fetch(self,
              reference = None,
              start = None, 
              end = None, 
              region = None ):
        """ Parse a stream of VCF-formatted lines.  Initializes class instance and return generator """

        iter = self.tabixfile.fetch( reference, start, end, region, parser = pysam.asVCF() )
        for x in iter:
            yield VCFRecord( x, self )
开发者ID:pkaleta,项目名称:pysam,代码行数:10,代码来源:VCF.py

示例6: check_nth_sample

def check_nth_sample(options, genotype):
    parser = pysam.asVCF()

    for contig in genotype.contigs:
        for record in text.parse_lines(genotype.fetch(contig), parser):
            if len(record) <= options.nth_sample:
                sys.stderr.write("ERROR: Sample %i selected with --nth-sample,"
                                 " but file only contains %i sample(s)!\n"
                                 % (options.nth_sample + 1, len(record)))
                return False
            return True
    return True
开发者ID:MikkelSchubert,项目名称:paleomix,代码行数:12,代码来源:vcf_to_fasta.py

示例7: records

    def records(self, chromosome_num, start_pos_bp, end_pos_bp, parser=pysam.asVCF()):
        '''
            Returns an iterator for the file records.
        '''

        start_pos_bp = 1 if start_pos_bp is None else start_pos_bp
        end_pos_bp = self.clens[str(chromosome_num)] if end_pos_bp is None else end_pos_bp

        # subtract one since pysam uses incorrect 0-indexed positions
        records = self.tabix_file.fetch( str(chromosome_num), start_pos_bp+self.indexDelta, end_pos_bp+self.indexDelta, parser)

        return records
开发者ID:quank,项目名称:cms,代码行数:12,代码来源:vcf_reader.py

示例8: _read_files

def _read_files(filenames, args):
    in_header = True
    has_filters = False
    vcf_parser = pysam.asVCF()
    for line in fileinput.input(filenames):
        if not line.startswith("#"):
            in_header = False
            line = line.rstrip("\n\r")
            yield vcf_parser(line, len(line))
        elif in_header:
            if not (line.startswith("##") or has_filters):
                has_filters = True
                for item in sorted(vcffilter.describe_filters(args).items()):
                    print('##FILTER=<ID=%s,Description="%s">' % item)

            print(line, end="")
开发者ID:CarlesV,项目名称:paleomix,代码行数:16,代码来源:vcf_filter.py

示例9: __init__

	def __init__(self, inFile, ploidy=1, parser=pysam.asVCF()):
		TabixReader.__init__(self, inFile, parser=parser)
		assert ploidy in (1,2)
		self.ploidy = ploidy
		self.clens = []
		self.sample_names = None
		for line in self.header:
			if line.startswith('##contig=<ID=') and line.endswith('>'):
				line = line[13:-1]
				c = line.split(',')[0]
				clen = int(line.split('=')[1])
				self.clens.append((c,clen))
			elif line.startswith('#CHROM'):
				row = line.split('\t')
				self.sample_names = row[9:]
		self.clens = dict(self.clens)
		assert self.sample_names
开发者ID:elimoss,项目名称:broad_malaria,代码行数:17,代码来源:util_vcf.py

示例10: testRead

    def testRead( self ):
        
        ncolumns = len(self.columns) 

        for x, r in enumerate(self.tabix.fetch( parser = pysam.asVCF() )):
            c = self.compare[x]
            for y, field in enumerate( self.columns ):
                if field == "pos":
                    self.assertEqual( int(c[y]) - 1, getattr( r, field ) )
                    self.assertEqual( int(c[y]) - 1, r.pos )
                else:
                    self.assertEqual( c[y], getattr( r, field ), 
                                      "mismatch in field %s: %s != %s" %\
                                          ( field,c[y], getattr( r, field ) ) )
            self.assertEqual( len(c), len( r ) + ncolumns )
            
            for y in range(len(c) - ncolumns):
                self.assertEqual( c[ncolumns+y], r[y] )
开发者ID:RLuisier,项目名称:RSeQC,代码行数:18,代码来源:tabix_test.py

示例11: _get_hits

def _get_hits(coords, annotation, parser_type):
    """Retrieve BED information, recovering if BED annotation file does have a chromosome.
    """
    if parser_type == "bed":
        parser = pysam.asBed()
    elif parser_type == "vcf":
        parser = pysam.asVCF()
    elif parser_type == "tuple":
        parser = pysam.asTuple()
    elif parser_type is None:
        parser = None
    else:
        raise ValueError("Unexpected parser type: %s" % parser)
    chrom, start, end = coords
    try:
        hit_iter = annotation.fetch(str(chrom), start, end, parser=parser)
    # catch invalid region errors raised by ctabix
    except ValueError:
        hit_iter = []
    return hit_iter
开发者ID:egafni,项目名称:gemini,代码行数:20,代码来源:annotations.py

示例12: GetSumOfDifferencesFromTheReference

def GetSumOfDifferencesFromTheReference(vcfpath):
    from subprocess import check_call
    from utilBMF.HTSUtils import TrimExt
    import pysam
    import numpy as np
    from sys import stderr
    from itertools import chain
    cfi = chain.from_iterable
    bgvcfpath = TrimExt(vcfpath) + ".gz"
    check_call("bgzip -c %s > %s" % (vcfpath, bgvcfpath), shell=True)
    stderr.write("bgvcf now at %s" % bgvcfpath)
    tabixstr = "tabix " + bgvcfpath
    stderr.write("Now calling tabixstr: '%s'" % tabixstr)
    check_call("tabix %s" % bgvcfpath, shell=True)
    infh = open(bgvcfpath, "rb")
    tabixhandle = pysam.tabix_iterator(infh, pysam.asVCF())
    return np.sum(np.array(list(cfi([dict(tup.split("=") for
                                          tup in i.info.split(";"))[
        'I16'].split(",")[2:4] for i in tabixhandle if
                                     "INDEL" not in i.info])), dtype=np.int64))
开发者ID:NoSeatbelts,项目名称:cybmf,代码行数:20,代码来源:pileup.py

示例13: testWrite

    def testWrite(self):

        ncolumns = len(self.columns)

        for x, r in enumerate(self.tabix.fetch(parser=pysam.asVCF())):
            c = self.compare[x]
            # check unmodified string
            cmp_string = str(r)
            ref_string = "\t".join([x for x in c])

            self.assertEqual(ref_string, cmp_string)

            # set fields and compare field-wise
            for y, field in enumerate(self.columns):
                # it is ok to have a missing format column
                if y == 8 and y == len(c):
                    continue
                if field == "pos":
                    rpos = getattr(r, field)
                    self.assertEqual(int(c[y]) - 1, rpos)
                    self.assertEqual(int(c[y]) - 1, r.pos)
                    # increment pos by 1
                    setattr(r, field, rpos + 1)
                    self.assertEqual(getattr(r, field), rpos + 1)
                    c[y] = str(int(c[y]) + 1)
                else:
                    setattr(r, field, "test_%i" % y)
                    c[y] = "test_%i" % y
                    self.assertEqual(c[y], getattr(r, field),
                                     "mismatch in field %s: %s != %s" %
                                     (field, c[y], getattr(r, field)))

            if len(c) == 8:
                self.assertEqual(0, len(r))
            else:
                self.assertEqual(len(c), len(r) + ncolumns)

            for y in range(len(c) - ncolumns):
                c[ncolumns + y] = "test_%i" % y
                r[y] = "test_%i" % y
                self.assertEqual(c[ncolumns + y], r[y])
开发者ID:humburg,项目名称:pysam,代码行数:41,代码来源:tabix_test.py

示例14: __init__

 def __init__(self, inFile, ploidy=1, parser=pysam.asVCF()):
     # using old-style class superclass calling here
     # since TabixReader is derived from pysam.TabixFile
     # which is itself an old-style class (due to Cython version?)
     TabixReader.__init__(self, inFile, parser=parser)
     # when pysam uses new-style classes, we can replace with:
     #super(VcfReader, self).__init__(inFile, parser=parser)
     assert ploidy in (1, 2)
     self.ploidy = ploidy
     self.clens = []
     self.sample_names = None
     for line in self.header:
         line = bytes_to_string(line)
         if line.startswith('##contig=<ID=') and line.endswith('>'):
             line = line[13:-1]
             c = line.split(',')[0]
             clen = int(line.split('=')[1])
             self.clens.append((c, clen))
         elif line.startswith('#CHROM'):
             row = line.split('\t')
             self.sample_names = row[9:]
     self.clens = dict(self.clens)
     assert self.sample_names
开发者ID:a113n,项目名称:viral-ngs,代码行数:23,代码来源:vcf.py

示例15: select_vcf_records

def select_vcf_records(bed_records, vcf_records):
    """Returns an iterable of VCF records, corresponding to the contents of each
    region specified by the BED records. Records are returned at most once, even
    if covered by multiple BED records."""
    contigs    = frozenset(vcf_records.contigs)
    vcf_parser = pysam.asVCF()

    # Timer class used processing progress; meant primarily for BAM files
    progress   = timer.BAMTimer(None)

    # Cache of positions observed for this contig, to prevent returning
    # positions in overlapping regions multiple times
    contig_cache = None
    contig_cache_name = None

    for bed in sorted(bed_records):
        if bed.contig not in contigs:
            # Skip contigs for which no calls have been made (e.g. due to
            # low coverage. Otherwise Pysam raises an exception.
            continue
        elif contig_cache_name != bed.contig:
            # Reset cache per contig, to save memory
            contig_cache = set()
            contig_cache_name = bed.contig

        for record in vcf_records.fetch(bed.contig, bed.start, bed.end, parser = vcf_parser):
            progress.increment()

            if record.pos in contig_cache:
                # We've already reported this VCF record
                continue

            contig_cache.add(record.pos)
            # Skip records filtered by VCF_filter
            if record.filter in ('.', "PASS"):
                yield record
    progress.finalize()
开发者ID:MikkelSchubert,项目名称:paleomix,代码行数:37,代码来源:summarize_heterozygosity.py


注:本文中的pysam.asVCF函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。