Python pysam.VariantFile方法代碼示例

本文整理匯總了Python中pysam.VariantFile方法的典型用法代碼示例。如果您正苦於以下問題：Python pysam.VariantFile方法的具體用法？Python pysam.VariantFile怎麽用？Python pysam.VariantFile使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pysam的用法示例。

在下文中一共展示了pysam.VariantFile方法的15個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: fix_vcf

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def fix_vcf(input_vcf, output_vcf):
    vcf_in = pysam.VariantFile(input_vcf)
    vcf_out = pysam.VariantFile(output_vcf, 'w', header=vcf_in.header)
    records = vcf_in.fetch()
    PS_dictionary = defaultdict(str)
    PS_value = 100

    for record in records:
        for sample in record.samples:
            input_ps = str(record.samples[sample]['PS'])

            if input_ps not in PS_dictionary:
                PS_dictionary = str(PS_value)
                PS_value += 50

            record.samples[sample]['PS'] = str(PS_value)
            vcf_out.write(record)

開發者ID:kishwarshafin，項目名稱:helen，代碼行數:19，代碼來源:VCF_remove_phase.py

示例2: init

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def __init__(self, vcfFile, info_columns = None, info_columns_prefixes = None, sample_columns = None):

        vcf = pysam.VariantFile(vcfFile)

        self.info_fields =  info_columns or []
        self.info_field_prefixes = info_columns_prefixes or []
        self.sample_fields = sample_columns or []
        self.variants = []
        self.features = []   #Bed-like features

        for unique_id, var in enumerate(vcf.fetch()):
            self.variants.append((var, unique_id))
            chr = var.chrom
            start = var.pos - 1
            end = start + 1       #TODO -- handle structure variants and deletions > 1 base
            self.features.append((Feature(chr, start, end, ''), unique_id))

開發者ID:igvteam，項目名稱:igv-reports，代碼行數:18，代碼來源:varianttable.py

示例3: _populateFromVariantFile

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def _populateFromVariantFile(self, varFile, dataUrl, indexFile):
        """
        Populates the instance variables of this VariantSet from the specified
        pysam VariantFile object.
        """
        if varFile.index is None:
            raise exceptions.NotIndexedException(dataUrl)
        for chrom in varFile.index:
            # Unlike Tabix indices, CSI indices include all contigs defined
            # in the BCF header.  Thus we must test each one to see if
            # records exist or else they are likely to trigger spurious
            # overlapping errors.
            chrom, _, _ = self.sanitizeVariantFileFetch(chrom)
            if not isEmptyIter(varFile.fetch(chrom)):
                if chrom in self._chromFileMap:
                    raise exceptions.OverlappingVcfException(dataUrl, chrom)
            self._chromFileMap[chrom] = dataUrl, indexFile
        self._updateMetadata(varFile)
        self._updateCallSetIds(varFile)
        self._updateVariantAnnotationSets(varFile, dataUrl)

開發者ID:ga4gh，項目名稱:ga4gh-server，代碼行數:22，代碼來源:variants.py

示例4: match_database

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def match_database(args):
    """Match a genome to a database of alleles."""
    refs   = Fastafile(expanduser(args.reference))
    db     = VariantFile(expanduser(args.database))
    sample = VariantFile(expanduser(args.sample))

    format_meta, info_meta = build_new_metadata(db, sample)

    with VariantFile(args.output, 'w', header=sample.header) as out:
        for superlocus, matches in generate_matches(refs, sample, db, args):
            for allele_locus, allele, match in matches:
                # Annotate results of search
                status, times = translate_match(match)
                suffix = '_' + status

                for locus in allele_locus:
                    annotate_info(locus, allele, info_meta, suffix, times)
                    annotate_format(locus, allele, format_meta, suffix, times)

            for locus in sorted(superlocus, key=NormalizedLocus.record_order_key):
                out.write(locus.record)

開發者ID:bioinformed，項目名稱:vgraph，代碼行數:23，代碼來源:dbmatch.py

示例5: fetch

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def fetch(self, chrm, pos_start, pos_end, return_samples=False):
    vcf_file = '%s.%s.vcf.gz' % (self.pop_vcf_stem, chrm)
    vcf_open = VariantFile(vcf_file, drop_samples=(not return_samples))
    return vcf_open.fetch(chrm, pos_start, pos_end)

開發者ID:calico，項目名稱:basenji，代碼行數:6，代碼來源:emerald.py

示例6: init

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def __init__(self, path):

        self.file = pysam.VariantFile(path)

開發者ID:igvteam，項目名稱:igv-reports，代碼行數:5，代碼來源:vcf.py

示例7: populateFromFile

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def populateFromFile(self, dataUrls, indexFiles):
        """
        Populates this variant set using the specified lists of data
        files and indexes. These must be in the same order, such that
        the jth index file corresponds to the jth data file.
        """
        assert len(dataUrls) == len(indexFiles)
        for dataUrl, indexFile in zip(dataUrls, indexFiles):
            varFile = pysam.VariantFile(dataUrl, index_filename=indexFile)
            try:
                self._populateFromVariantFile(varFile, dataUrl, indexFile)
            finally:
                varFile.close()

開發者ID:ga4gh，項目名稱:ga4gh-server，代碼行數:15，代碼來源:variants.py

示例8: checkConsistency

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def checkConsistency(self):
        """
        Perform consistency check on the variant set
        """
        for referenceName, (dataUrl, indexFile) in self._chromFileMap.items():
            varFile = pysam.VariantFile(dataUrl, index_filename=indexFile)
            try:
                for chrom in varFile.index:
                    chrom, _, _ = self.sanitizeVariantFileFetch(chrom)
                    if not isEmptyIter(varFile.fetch(chrom)):
                        self._checkMetadata(varFile)
                        self._checkCallSetIds(varFile)
            finally:
                varFile.close()

開發者ID:ga4gh，項目名稱:ga4gh-server，代碼行數:16，代碼來源:variants.py

示例9: getNumVariants

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def getNumVariants(self):
        """
        Returns the total number of variants in this VariantSet.
        """
        # TODO How do we get the number of records in a VariantFile?
        return 0

開發者ID:ga4gh，項目名稱:ga4gh-server，代碼行數:8，代碼來源:variants.py

示例10: openFile

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def openFile(self, dataUrlIndexFilePair):
        dataUrl, indexFile = dataUrlIndexFilePair
        return pysam.VariantFile(dataUrl, index_filename=indexFile)

開發者ID:ga4gh，項目名稱:ga4gh-server，代碼行數:5，代碼來源:variants.py

示例11: make_outputs

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def make_outputs(in_vars, out1, out2):
    """Make output files."""
    out_vars = [None, None]

    if out1:
        in_vars[0].header.formats.add('BD', '1', 'String', 'Match decision for call (match: =, mismatch: X, error: N)')
        in_vars[0].header.formats.add('BK', '1', 'String', 'Sub-type for match decision (trivial: T, haplotype: H, error: N)')
        out_vars[0] = VariantFile(out1, 'w', header=in_vars[0].header)

    if out2:
        in_vars[1].header.formats.add('BD', '1', 'String', 'Match decision for call (match: =, mismatch: X, error: N)')
        in_vars[1].header.formats.add('BK', '1', 'String', 'Sub-type for match decision (trivial: T, haplotype: H, error: N)')
        out_vars[1] = VariantFile(out2, 'w', header=in_vars[1].header)

    return out_vars

開發者ID:bioinformed，項目名稱:vgraph，代碼行數:17，代碼來源:repmatch.py

示例12: normalize

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def normalize(args):
    """Normalize variants."""
    refs = Fastafile(expanduser(args.reference))
    variants = VariantFile(args.sample)

    with VariantFile(args.output, 'w', header=variants.header) as out:
        # Create parallel locus iterator by chromosome
        for _, ref, loci in records_by_chromosome(refs, [variants], [None], args):
            loci = sort_almost_sorted(loci[0], key=NormalizedLocus.left_order_key)

            for locus in loci:
                record  = locus.record
                start   = locus.left.start
                stop    = locus.left.stop
                alleles = locus.left.alleles

                if '' in alleles:
                    pad = ref[start - 1:start]
                    start -= 1
                    alleles = [pad + a for a in alleles]

                record.alleles = alleles
                record.start   = start
                record.stop    = stop

                out.write(record)

開發者ID:bioinformed，項目名稱:vgraph，代碼行數:28，代碼來源:vgraph.py

示例13: ts_to_pysam

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def ts_to_pysam(ts, *args, **kwargs):
    """
    Returns a pysam VariantFile for the specified tree sequence and arguments.
    """
    with tempfile.TemporaryDirectory() as temp_dir:
        vcf_path = os.path.join(temp_dir, "file.vcf")
        with open(vcf_path, "w") as f:
            ts.write_vcf(f, *args, **kwargs)
        yield pysam.VariantFile(vcf_path)

開發者ID:tskit-dev，項目名稱:tskit，代碼行數:11，代碼來源:test_vcf.py

示例14: vcf_header_lines

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def vcf_header_lines(vcf_file_name):
    """
    Get header lines for the genotype output VCF from the input variant-call VCF file.

    :param vcf_file_name: Name of a variant VCF file.

    :return: A list of VCF headers as strings with newlines. Does not include the column heading line at the end
        of the headers.
    """

    header_list = list()

    with pysam.VariantFile(vcf_file_name) as vcf_file:
        # Add VCF version if missing
        header_list.append(vcf_version(vcf_file))

        # Set file date
        header_list.append('##fileDate={}\n'.format(time.strftime("%Y%m%d")))

        # Set source
        header_list.extend(vcf_get_source_list(vcf_file))
        header_list.append('##source=SMRTSV_Genotyper_{}\n'.format(smrtsvlib.__version__))

        # Get header elements excluding FORMAT tags
        for header_element in vcf_file.header.records:

            # Replace FORMAT tags
            if header_element.type == 'FORMAT':
                continue

            # Source and date handled
            if header_element.type == 'GENERIC' and header_element.key.lower() in {'fileformat', 'source', 'filedate'}:
                continue

            # Write record
            header_list.append(str(header_element))

        # Add FORMAT tags written by the genotyper
        header_list.extend(vcf_get_format_tags())

    # Return header lines
    return header_list

開發者ID:EichlerLab，項目名稱:smrtsv2，代碼行數:44，代碼來源:genotype.py

示例15: match_database2

# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def match_database2(args):
    """Match a genome to a database of alleles."""
    refs   = Fastafile(expanduser(args.reference))
    db     = VariantFile(expanduser(args.database))
    sample = VariantFile(expanduser(args.sample))

    try:
        sample_name = sample.header.samples[args.name]
    except TypeError:
        sample_name = args.name

    if db.index is None:
        raise ValueError('database file must be indexed')
    if sample.index is None:
        raise ValueError('sample file must be indexed')

    # Open tabluar output file, if requested
    table = None
    if args.table:
        tablefile = open(args.table, 'w') if args.table != '-' else sys.stdout
        table = csv.writer(tablefile, delimiter='\t', lineterminator='\n')
        write_table_header(table)

    update_info_header(sample.header)

    with VariantFile(args.output, 'w', header=sample.header) as out:
        for superlocus, matches in generate_matches(refs, sample, db, args):
            clear_info_fields(superlocus)

            for allele_locus, allele, match in matches:
                dbvar  = allele.record
                var_id = dbvar.id or f'{dbvar.chrom}_{dbvar.start+1}_{dbvar.stop}_{dbvar.alts[0]}'

                status, times = translate_match(match)

                for locus in allele_locus:
                    info = locus.record.info
                    info[status] = info.get(status, ()) + (var_id, ) * times

                write_table_row(table, sample_name, var_id, allele_locus, status, match)

            for locus in sorted(superlocus, key=NormalizedLocus.record_order_key):
                out.write(locus.record)

開發者ID:bioinformed，項目名稱:vgraph，代碼行數:45，代碼來源:dbmatch.py

注：本文中的pysam.VariantFile方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。