本文整理汇总了Python中pysam.VariantFile方法的典型用法代码示例。如果您正苦于以下问题:Python pysam.VariantFile方法的具体用法?Python pysam.VariantFile怎么用?Python pysam.VariantFile使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pysam
的用法示例。
在下文中一共展示了pysam.VariantFile方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: fix_vcf
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def fix_vcf(input_vcf, output_vcf):
vcf_in = pysam.VariantFile(input_vcf)
vcf_out = pysam.VariantFile(output_vcf, 'w', header=vcf_in.header)
records = vcf_in.fetch()
PS_dictionary = defaultdict(str)
PS_value = 100
for record in records:
for sample in record.samples:
input_ps = str(record.samples[sample]['PS'])
if input_ps not in PS_dictionary:
PS_dictionary = str(PS_value)
PS_value += 50
record.samples[sample]['PS'] = str(PS_value)
vcf_out.write(record)
示例2: __init__
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def __init__(self, vcfFile, info_columns = None, info_columns_prefixes = None, sample_columns = None):
vcf = pysam.VariantFile(vcfFile)
self.info_fields = info_columns or []
self.info_field_prefixes = info_columns_prefixes or []
self.sample_fields = sample_columns or []
self.variants = []
self.features = [] #Bed-like features
for unique_id, var in enumerate(vcf.fetch()):
self.variants.append((var, unique_id))
chr = var.chrom
start = var.pos - 1
end = start + 1 #TODO -- handle structure variants and deletions > 1 base
self.features.append((Feature(chr, start, end, ''), unique_id))
示例3: _populateFromVariantFile
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def _populateFromVariantFile(self, varFile, dataUrl, indexFile):
"""
Populates the instance variables of this VariantSet from the specified
pysam VariantFile object.
"""
if varFile.index is None:
raise exceptions.NotIndexedException(dataUrl)
for chrom in varFile.index:
# Unlike Tabix indices, CSI indices include all contigs defined
# in the BCF header. Thus we must test each one to see if
# records exist or else they are likely to trigger spurious
# overlapping errors.
chrom, _, _ = self.sanitizeVariantFileFetch(chrom)
if not isEmptyIter(varFile.fetch(chrom)):
if chrom in self._chromFileMap:
raise exceptions.OverlappingVcfException(dataUrl, chrom)
self._chromFileMap[chrom] = dataUrl, indexFile
self._updateMetadata(varFile)
self._updateCallSetIds(varFile)
self._updateVariantAnnotationSets(varFile, dataUrl)
示例4: match_database
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def match_database(args):
"""Match a genome to a database of alleles."""
refs = Fastafile(expanduser(args.reference))
db = VariantFile(expanduser(args.database))
sample = VariantFile(expanduser(args.sample))
format_meta, info_meta = build_new_metadata(db, sample)
with VariantFile(args.output, 'w', header=sample.header) as out:
for superlocus, matches in generate_matches(refs, sample, db, args):
for allele_locus, allele, match in matches:
# Annotate results of search
status, times = translate_match(match)
suffix = '_' + status
for locus in allele_locus:
annotate_info(locus, allele, info_meta, suffix, times)
annotate_format(locus, allele, format_meta, suffix, times)
for locus in sorted(superlocus, key=NormalizedLocus.record_order_key):
out.write(locus.record)
示例5: fetch
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def fetch(self, chrm, pos_start, pos_end, return_samples=False):
vcf_file = '%s.%s.vcf.gz' % (self.pop_vcf_stem, chrm)
vcf_open = VariantFile(vcf_file, drop_samples=(not return_samples))
return vcf_open.fetch(chrm, pos_start, pos_end)
示例6: __init__
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def __init__(self, path):
self.file = pysam.VariantFile(path)
示例7: populateFromFile
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def populateFromFile(self, dataUrls, indexFiles):
"""
Populates this variant set using the specified lists of data
files and indexes. These must be in the same order, such that
the jth index file corresponds to the jth data file.
"""
assert len(dataUrls) == len(indexFiles)
for dataUrl, indexFile in zip(dataUrls, indexFiles):
varFile = pysam.VariantFile(dataUrl, index_filename=indexFile)
try:
self._populateFromVariantFile(varFile, dataUrl, indexFile)
finally:
varFile.close()
示例8: checkConsistency
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def checkConsistency(self):
"""
Perform consistency check on the variant set
"""
for referenceName, (dataUrl, indexFile) in self._chromFileMap.items():
varFile = pysam.VariantFile(dataUrl, index_filename=indexFile)
try:
for chrom in varFile.index:
chrom, _, _ = self.sanitizeVariantFileFetch(chrom)
if not isEmptyIter(varFile.fetch(chrom)):
self._checkMetadata(varFile)
self._checkCallSetIds(varFile)
finally:
varFile.close()
示例9: getNumVariants
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def getNumVariants(self):
"""
Returns the total number of variants in this VariantSet.
"""
# TODO How do we get the number of records in a VariantFile?
return 0
示例10: openFile
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def openFile(self, dataUrlIndexFilePair):
dataUrl, indexFile = dataUrlIndexFilePair
return pysam.VariantFile(dataUrl, index_filename=indexFile)
示例11: make_outputs
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def make_outputs(in_vars, out1, out2):
"""Make output files."""
out_vars = [None, None]
if out1:
in_vars[0].header.formats.add('BD', '1', 'String', 'Match decision for call (match: =, mismatch: X, error: N)')
in_vars[0].header.formats.add('BK', '1', 'String', 'Sub-type for match decision (trivial: T, haplotype: H, error: N)')
out_vars[0] = VariantFile(out1, 'w', header=in_vars[0].header)
if out2:
in_vars[1].header.formats.add('BD', '1', 'String', 'Match decision for call (match: =, mismatch: X, error: N)')
in_vars[1].header.formats.add('BK', '1', 'String', 'Sub-type for match decision (trivial: T, haplotype: H, error: N)')
out_vars[1] = VariantFile(out2, 'w', header=in_vars[1].header)
return out_vars
示例12: normalize
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def normalize(args):
"""Normalize variants."""
refs = Fastafile(expanduser(args.reference))
variants = VariantFile(args.sample)
with VariantFile(args.output, 'w', header=variants.header) as out:
# Create parallel locus iterator by chromosome
for _, ref, loci in records_by_chromosome(refs, [variants], [None], args):
loci = sort_almost_sorted(loci[0], key=NormalizedLocus.left_order_key)
for locus in loci:
record = locus.record
start = locus.left.start
stop = locus.left.stop
alleles = locus.left.alleles
if '' in alleles:
pad = ref[start - 1:start]
start -= 1
alleles = [pad + a for a in alleles]
record.alleles = alleles
record.start = start
record.stop = stop
out.write(record)
示例13: ts_to_pysam
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def ts_to_pysam(ts, *args, **kwargs):
"""
Returns a pysam VariantFile for the specified tree sequence and arguments.
"""
with tempfile.TemporaryDirectory() as temp_dir:
vcf_path = os.path.join(temp_dir, "file.vcf")
with open(vcf_path, "w") as f:
ts.write_vcf(f, *args, **kwargs)
yield pysam.VariantFile(vcf_path)
示例14: vcf_header_lines
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def vcf_header_lines(vcf_file_name):
"""
Get header lines for the genotype output VCF from the input variant-call VCF file.
:param vcf_file_name: Name of a variant VCF file.
:return: A list of VCF headers as strings with newlines. Does not include the column heading line at the end
of the headers.
"""
header_list = list()
with pysam.VariantFile(vcf_file_name) as vcf_file:
# Add VCF version if missing
header_list.append(vcf_version(vcf_file))
# Set file date
header_list.append('##fileDate={}\n'.format(time.strftime("%Y%m%d")))
# Set source
header_list.extend(vcf_get_source_list(vcf_file))
header_list.append('##source=SMRTSV_Genotyper_{}\n'.format(smrtsvlib.__version__))
# Get header elements excluding FORMAT tags
for header_element in vcf_file.header.records:
# Replace FORMAT tags
if header_element.type == 'FORMAT':
continue
# Source and date handled
if header_element.type == 'GENERIC' and header_element.key.lower() in {'fileformat', 'source', 'filedate'}:
continue
# Write record
header_list.append(str(header_element))
# Add FORMAT tags written by the genotyper
header_list.extend(vcf_get_format_tags())
# Return header lines
return header_list
示例15: match_database2
# 需要导入模块: import pysam [as 别名]
# 或者: from pysam import VariantFile [as 别名]
def match_database2(args):
"""Match a genome to a database of alleles."""
refs = Fastafile(expanduser(args.reference))
db = VariantFile(expanduser(args.database))
sample = VariantFile(expanduser(args.sample))
try:
sample_name = sample.header.samples[args.name]
except TypeError:
sample_name = args.name
if db.index is None:
raise ValueError('database file must be indexed')
if sample.index is None:
raise ValueError('sample file must be indexed')
# Open tabluar output file, if requested
table = None
if args.table:
tablefile = open(args.table, 'w') if args.table != '-' else sys.stdout
table = csv.writer(tablefile, delimiter='\t', lineterminator='\n')
write_table_header(table)
update_info_header(sample.header)
with VariantFile(args.output, 'w', header=sample.header) as out:
for superlocus, matches in generate_matches(refs, sample, db, args):
clear_info_fields(superlocus)
for allele_locus, allele, match in matches:
dbvar = allele.record
var_id = dbvar.id or f'{dbvar.chrom}_{dbvar.start+1}_{dbvar.stop}_{dbvar.alts[0]}'
status, times = translate_match(match)
for locus in allele_locus:
info = locus.record.info
info[status] = info.get(status, ()) + (var_id, ) * times
write_table_row(table, sample_name, var_id, allele_locus, status, match)
for locus in sorted(superlocus, key=NormalizedLocus.record_order_key):
out.write(locus.record)