当前位置: 首页>>代码示例>>Python>>正文


Python pysam.VariantFile方法代码示例

本文整理汇总了Python中pysam.VariantFile方法的典型用法代码示例。如果您正苦于以下问题:Python pysam.VariantFile方法的具体用法?Python pysam.VariantFile怎么用?Python pysam.VariantFile使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pysam的用法示例。


在下文中一共展示了pysam.VariantFile方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: fix_vcf

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def fix_vcf(input_vcf, output_vcf):
    vcf_in = pysam.VariantFile(input_vcf)
    vcf_out = pysam.VariantFile(output_vcf, 'w', header=vcf_in.header)
    records = vcf_in.fetch()
    PS_dictionary = defaultdict(str)
    PS_value = 100

    for record in records:
        for sample in record.samples:
            input_ps = str(record.samples[sample]['PS'])

            if input_ps not in PS_dictionary:
                PS_dictionary = str(PS_value)
                PS_value += 50

            record.samples[sample]['PS'] = str(PS_value)
            vcf_out.write(record) 
开发者ID:kishwarshafin,项目名称:helen,代码行数:19,代码来源:VCF_remove_phase.py

示例2: __init__

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def __init__(self, vcfFile, info_columns = None, info_columns_prefixes = None, sample_columns = None):

        vcf = pysam.VariantFile(vcfFile)

        self.info_fields =  info_columns or []
        self.info_field_prefixes = info_columns_prefixes or []
        self.sample_fields = sample_columns or []
        self.variants = []
        self.features = []   #Bed-like features

        for unique_id, var in enumerate(vcf.fetch()):
            self.variants.append((var, unique_id))
            chr = var.chrom
            start = var.pos - 1
            end = start + 1       #TODO -- handle structure variants and deletions > 1 base
            self.features.append((Feature(chr, start, end, ''), unique_id)) 
开发者ID:igvteam,项目名称:igv-reports,代码行数:18,代码来源:varianttable.py

示例3: _populateFromVariantFile

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def _populateFromVariantFile(self, varFile, dataUrl, indexFile):
        """
        Populates the instance variables of this VariantSet from the specified
        pysam VariantFile object.
        """
        if varFile.index is None:
            raise exceptions.NotIndexedException(dataUrl)
        for chrom in varFile.index:
            # Unlike Tabix indices, CSI indices include all contigs defined
            # in the BCF header.  Thus we must test each one to see if
            # records exist or else they are likely to trigger spurious
            # overlapping errors.
            chrom, _, _ = self.sanitizeVariantFileFetch(chrom)
            if not isEmptyIter(varFile.fetch(chrom)):
                if chrom in self._chromFileMap:
                    raise exceptions.OverlappingVcfException(dataUrl, chrom)
            self._chromFileMap[chrom] = dataUrl, indexFile
        self._updateMetadata(varFile)
        self._updateCallSetIds(varFile)
        self._updateVariantAnnotationSets(varFile, dataUrl) 
开发者ID:ga4gh,项目名称:ga4gh-server,代码行数:22,代码来源:variants.py

示例4: match_database

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def match_database(args):
    """Match a genome to a database of alleles."""
    refs   = Fastafile(expanduser(args.reference))
    db     = VariantFile(expanduser(args.database))
    sample = VariantFile(expanduser(args.sample))

    format_meta, info_meta = build_new_metadata(db, sample)

    with VariantFile(args.output, 'w', header=sample.header) as out:
        for superlocus, matches in generate_matches(refs, sample, db, args):
            for allele_locus, allele, match in matches:
                # Annotate results of search
                status, times = translate_match(match)
                suffix = '_' + status

                for locus in allele_locus:
                    annotate_info(locus, allele, info_meta, suffix, times)
                    annotate_format(locus, allele, format_meta, suffix, times)

            for locus in sorted(superlocus, key=NormalizedLocus.record_order_key):
                out.write(locus.record) 
开发者ID:bioinformed,项目名称:vgraph,代码行数:23,代码来源:dbmatch.py

示例5: fetch

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def fetch(self, chrm, pos_start, pos_end, return_samples=False):
    vcf_file = '%s.%s.vcf.gz' % (self.pop_vcf_stem, chrm)
    vcf_open = VariantFile(vcf_file, drop_samples=(not return_samples))
    return vcf_open.fetch(chrm, pos_start, pos_end) 
开发者ID:calico,项目名称:basenji,代码行数:6,代码来源:emerald.py

示例6: __init__

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def __init__(self, path):

        self.file = pysam.VariantFile(path) 
开发者ID:igvteam,项目名称:igv-reports,代码行数:5,代码来源:vcf.py

示例7: populateFromFile

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def populateFromFile(self, dataUrls, indexFiles):
        """
        Populates this variant set using the specified lists of data
        files and indexes. These must be in the same order, such that
        the jth index file corresponds to the jth data file.
        """
        assert len(dataUrls) == len(indexFiles)
        for dataUrl, indexFile in zip(dataUrls, indexFiles):
            varFile = pysam.VariantFile(dataUrl, index_filename=indexFile)
            try:
                self._populateFromVariantFile(varFile, dataUrl, indexFile)
            finally:
                varFile.close() 
开发者ID:ga4gh,项目名称:ga4gh-server,代码行数:15,代码来源:variants.py

示例8: checkConsistency

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def checkConsistency(self):
        """
        Perform consistency check on the variant set
        """
        for referenceName, (dataUrl, indexFile) in self._chromFileMap.items():
            varFile = pysam.VariantFile(dataUrl, index_filename=indexFile)
            try:
                for chrom in varFile.index:
                    chrom, _, _ = self.sanitizeVariantFileFetch(chrom)
                    if not isEmptyIter(varFile.fetch(chrom)):
                        self._checkMetadata(varFile)
                        self._checkCallSetIds(varFile)
            finally:
                varFile.close() 
开发者ID:ga4gh,项目名称:ga4gh-server,代码行数:16,代码来源:variants.py

示例9: getNumVariants

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def getNumVariants(self):
        """
        Returns the total number of variants in this VariantSet.
        """
        # TODO How do we get the number of records in a VariantFile?
        return 0 
开发者ID:ga4gh,项目名称:ga4gh-server,代码行数:8,代码来源:variants.py

示例10: openFile

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def openFile(self, dataUrlIndexFilePair):
        dataUrl, indexFile = dataUrlIndexFilePair
        return pysam.VariantFile(dataUrl, index_filename=indexFile) 
开发者ID:ga4gh,项目名称:ga4gh-server,代码行数:5,代码来源:variants.py

示例11: make_outputs

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def make_outputs(in_vars, out1, out2):
    """Make output files."""
    out_vars = [None, None]

    if out1:
        in_vars[0].header.formats.add('BD', '1', 'String', 'Match decision for call (match: =, mismatch: X, error: N)')
        in_vars[0].header.formats.add('BK', '1', 'String', 'Sub-type for match decision (trivial: T, haplotype: H, error: N)')
        out_vars[0] = VariantFile(out1, 'w', header=in_vars[0].header)

    if out2:
        in_vars[1].header.formats.add('BD', '1', 'String', 'Match decision for call (match: =, mismatch: X, error: N)')
        in_vars[1].header.formats.add('BK', '1', 'String', 'Sub-type for match decision (trivial: T, haplotype: H, error: N)')
        out_vars[1] = VariantFile(out2, 'w', header=in_vars[1].header)

    return out_vars 
开发者ID:bioinformed,项目名称:vgraph,代码行数:17,代码来源:repmatch.py

示例12: normalize

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def normalize(args):
    """Normalize variants."""
    refs = Fastafile(expanduser(args.reference))
    variants = VariantFile(args.sample)

    with VariantFile(args.output, 'w', header=variants.header) as out:
        # Create parallel locus iterator by chromosome
        for _, ref, loci in records_by_chromosome(refs, [variants], [None], args):
            loci = sort_almost_sorted(loci[0], key=NormalizedLocus.left_order_key)

            for locus in loci:
                record  = locus.record
                start   = locus.left.start
                stop    = locus.left.stop
                alleles = locus.left.alleles

                if '' in alleles:
                    pad = ref[start - 1:start]
                    start -= 1
                    alleles = [pad + a for a in alleles]

                record.alleles = alleles
                record.start   = start
                record.stop    = stop

                out.write(record) 
开发者ID:bioinformed,项目名称:vgraph,代码行数:28,代码来源:vgraph.py

示例13: ts_to_pysam

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def ts_to_pysam(ts, *args, **kwargs):
    """
    Returns a pysam VariantFile for the specified tree sequence and arguments.
    """
    with tempfile.TemporaryDirectory() as temp_dir:
        vcf_path = os.path.join(temp_dir, "file.vcf")
        with open(vcf_path, "w") as f:
            ts.write_vcf(f, *args, **kwargs)
        yield pysam.VariantFile(vcf_path) 
开发者ID:tskit-dev,项目名称:tskit,代码行数:11,代码来源:test_vcf.py

示例14: vcf_header_lines

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def vcf_header_lines(vcf_file_name):
    """
    Get header lines for the genotype output VCF from the input variant-call VCF file.

    :param vcf_file_name: Name of a variant VCF file.

    :return: A list of VCF headers as strings with newlines. Does not include the column heading line at the end
        of the headers.
    """

    header_list = list()

    with pysam.VariantFile(vcf_file_name) as vcf_file:
        # Add VCF version if missing
        header_list.append(vcf_version(vcf_file))

        # Set file date
        header_list.append('##fileDate={}\n'.format(time.strftime("%Y%m%d")))

        # Set source
        header_list.extend(vcf_get_source_list(vcf_file))
        header_list.append('##source=SMRTSV_Genotyper_{}\n'.format(smrtsvlib.__version__))

        # Get header elements excluding FORMAT tags
        for header_element in vcf_file.header.records:

            # Replace FORMAT tags
            if header_element.type == 'FORMAT':
                continue

            # Source and date handled
            if header_element.type == 'GENERIC' and header_element.key.lower() in {'fileformat', 'source', 'filedate'}:
                continue

            # Write record
            header_list.append(str(header_element))

        # Add FORMAT tags written by the genotyper
        header_list.extend(vcf_get_format_tags())

    # Return header lines
    return header_list 
开发者ID:EichlerLab,项目名称:smrtsv2,代码行数:44,代码来源:genotype.py

示例15: match_database2

# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def match_database2(args):
    """Match a genome to a database of alleles."""
    refs   = Fastafile(expanduser(args.reference))
    db     = VariantFile(expanduser(args.database))
    sample = VariantFile(expanduser(args.sample))

    try:
        sample_name = sample.header.samples[args.name]
    except TypeError:
        sample_name = args.name

    if db.index is None:
        raise ValueError('database file must be indexed')
    if sample.index is None:
        raise ValueError('sample file must be indexed')

    # Open tabluar output file, if requested
    table = None
    if args.table:
        tablefile = open(args.table, 'w') if args.table != '-' else sys.stdout
        table = csv.writer(tablefile, delimiter='\t', lineterminator='\n')
        write_table_header(table)

    update_info_header(sample.header)

    with VariantFile(args.output, 'w', header=sample.header) as out:
        for superlocus, matches in generate_matches(refs, sample, db, args):
            clear_info_fields(superlocus)

            for allele_locus, allele, match in matches:
                dbvar  = allele.record
                var_id = dbvar.id or f'{dbvar.chrom}_{dbvar.start+1}_{dbvar.stop}_{dbvar.alts[0]}'

                status, times = translate_match(match)

                for locus in allele_locus:
                    info = locus.record.info
                    info[status] = info.get(status, ()) + (var_id, ) * times

                write_table_row(table, sample_name, var_id, allele_locus, status, match)

            for locus in sorted(superlocus, key=NormalizedLocus.record_order_key):
                out.write(locus.record) 
开发者ID:bioinformed,项目名称:vgraph,代码行数:45,代码来源:dbmatch.py


注:本文中的pysam.VariantFile方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。