本文整理匯總了Python中pysam.VariantFile方法的典型用法代碼示例。如果您正苦於以下問題:Python pysam.VariantFile方法的具體用法?Python pysam.VariantFile怎麽用?Python pysam.VariantFile使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類pysam
的用法示例。
在下文中一共展示了pysam.VariantFile方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: fix_vcf
# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def fix_vcf(input_vcf, output_vcf):
vcf_in = pysam.VariantFile(input_vcf)
vcf_out = pysam.VariantFile(output_vcf, 'w', header=vcf_in.header)
records = vcf_in.fetch()
PS_dictionary = defaultdict(str)
PS_value = 100
for record in records:
for sample in record.samples:
input_ps = str(record.samples[sample]['PS'])
if input_ps not in PS_dictionary:
PS_dictionary = str(PS_value)
PS_value += 50
record.samples[sample]['PS'] = str(PS_value)
vcf_out.write(record)
示例2: __init__
# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def __init__(self, vcfFile, info_columns = None, info_columns_prefixes = None, sample_columns = None):
vcf = pysam.VariantFile(vcfFile)
self.info_fields = info_columns or []
self.info_field_prefixes = info_columns_prefixes or []
self.sample_fields = sample_columns or []
self.variants = []
self.features = [] #Bed-like features
for unique_id, var in enumerate(vcf.fetch()):
self.variants.append((var, unique_id))
chr = var.chrom
start = var.pos - 1
end = start + 1 #TODO -- handle structure variants and deletions > 1 base
self.features.append((Feature(chr, start, end, ''), unique_id))
示例3: _populateFromVariantFile
# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def _populateFromVariantFile(self, varFile, dataUrl, indexFile):
"""
Populates the instance variables of this VariantSet from the specified
pysam VariantFile object.
"""
if varFile.index is None:
raise exceptions.NotIndexedException(dataUrl)
for chrom in varFile.index:
# Unlike Tabix indices, CSI indices include all contigs defined
# in the BCF header. Thus we must test each one to see if
# records exist or else they are likely to trigger spurious
# overlapping errors.
chrom, _, _ = self.sanitizeVariantFileFetch(chrom)
if not isEmptyIter(varFile.fetch(chrom)):
if chrom in self._chromFileMap:
raise exceptions.OverlappingVcfException(dataUrl, chrom)
self._chromFileMap[chrom] = dataUrl, indexFile
self._updateMetadata(varFile)
self._updateCallSetIds(varFile)
self._updateVariantAnnotationSets(varFile, dataUrl)
示例4: match_database
# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def match_database(args):
"""Match a genome to a database of alleles."""
refs = Fastafile(expanduser(args.reference))
db = VariantFile(expanduser(args.database))
sample = VariantFile(expanduser(args.sample))
format_meta, info_meta = build_new_metadata(db, sample)
with VariantFile(args.output, 'w', header=sample.header) as out:
for superlocus, matches in generate_matches(refs, sample, db, args):
for allele_locus, allele, match in matches:
# Annotate results of search
status, times = translate_match(match)
suffix = '_' + status
for locus in allele_locus:
annotate_info(locus, allele, info_meta, suffix, times)
annotate_format(locus, allele, format_meta, suffix, times)
for locus in sorted(superlocus, key=NormalizedLocus.record_order_key):
out.write(locus.record)
示例5: fetch
# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def fetch(self, chrm, pos_start, pos_end, return_samples=False):
vcf_file = '%s.%s.vcf.gz' % (self.pop_vcf_stem, chrm)
vcf_open = VariantFile(vcf_file, drop_samples=(not return_samples))
return vcf_open.fetch(chrm, pos_start, pos_end)
示例6: __init__
# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def __init__(self, path):
self.file = pysam.VariantFile(path)
示例7: populateFromFile
# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def populateFromFile(self, dataUrls, indexFiles):
"""
Populates this variant set using the specified lists of data
files and indexes. These must be in the same order, such that
the jth index file corresponds to the jth data file.
"""
assert len(dataUrls) == len(indexFiles)
for dataUrl, indexFile in zip(dataUrls, indexFiles):
varFile = pysam.VariantFile(dataUrl, index_filename=indexFile)
try:
self._populateFromVariantFile(varFile, dataUrl, indexFile)
finally:
varFile.close()
示例8: checkConsistency
# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def checkConsistency(self):
"""
Perform consistency check on the variant set
"""
for referenceName, (dataUrl, indexFile) in self._chromFileMap.items():
varFile = pysam.VariantFile(dataUrl, index_filename=indexFile)
try:
for chrom in varFile.index:
chrom, _, _ = self.sanitizeVariantFileFetch(chrom)
if not isEmptyIter(varFile.fetch(chrom)):
self._checkMetadata(varFile)
self._checkCallSetIds(varFile)
finally:
varFile.close()
示例9: getNumVariants
# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def getNumVariants(self):
"""
Returns the total number of variants in this VariantSet.
"""
# TODO How do we get the number of records in a VariantFile?
return 0
示例10: openFile
# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def openFile(self, dataUrlIndexFilePair):
dataUrl, indexFile = dataUrlIndexFilePair
return pysam.VariantFile(dataUrl, index_filename=indexFile)
示例11: make_outputs
# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def make_outputs(in_vars, out1, out2):
"""Make output files."""
out_vars = [None, None]
if out1:
in_vars[0].header.formats.add('BD', '1', 'String', 'Match decision for call (match: =, mismatch: X, error: N)')
in_vars[0].header.formats.add('BK', '1', 'String', 'Sub-type for match decision (trivial: T, haplotype: H, error: N)')
out_vars[0] = VariantFile(out1, 'w', header=in_vars[0].header)
if out2:
in_vars[1].header.formats.add('BD', '1', 'String', 'Match decision for call (match: =, mismatch: X, error: N)')
in_vars[1].header.formats.add('BK', '1', 'String', 'Sub-type for match decision (trivial: T, haplotype: H, error: N)')
out_vars[1] = VariantFile(out2, 'w', header=in_vars[1].header)
return out_vars
示例12: normalize
# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def normalize(args):
"""Normalize variants."""
refs = Fastafile(expanduser(args.reference))
variants = VariantFile(args.sample)
with VariantFile(args.output, 'w', header=variants.header) as out:
# Create parallel locus iterator by chromosome
for _, ref, loci in records_by_chromosome(refs, [variants], [None], args):
loci = sort_almost_sorted(loci[0], key=NormalizedLocus.left_order_key)
for locus in loci:
record = locus.record
start = locus.left.start
stop = locus.left.stop
alleles = locus.left.alleles
if '' in alleles:
pad = ref[start - 1:start]
start -= 1
alleles = [pad + a for a in alleles]
record.alleles = alleles
record.start = start
record.stop = stop
out.write(record)
示例13: ts_to_pysam
# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def ts_to_pysam(ts, *args, **kwargs):
"""
Returns a pysam VariantFile for the specified tree sequence and arguments.
"""
with tempfile.TemporaryDirectory() as temp_dir:
vcf_path = os.path.join(temp_dir, "file.vcf")
with open(vcf_path, "w") as f:
ts.write_vcf(f, *args, **kwargs)
yield pysam.VariantFile(vcf_path)
示例14: vcf_header_lines
# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def vcf_header_lines(vcf_file_name):
"""
Get header lines for the genotype output VCF from the input variant-call VCF file.
:param vcf_file_name: Name of a variant VCF file.
:return: A list of VCF headers as strings with newlines. Does not include the column heading line at the end
of the headers.
"""
header_list = list()
with pysam.VariantFile(vcf_file_name) as vcf_file:
# Add VCF version if missing
header_list.append(vcf_version(vcf_file))
# Set file date
header_list.append('##fileDate={}\n'.format(time.strftime("%Y%m%d")))
# Set source
header_list.extend(vcf_get_source_list(vcf_file))
header_list.append('##source=SMRTSV_Genotyper_{}\n'.format(smrtsvlib.__version__))
# Get header elements excluding FORMAT tags
for header_element in vcf_file.header.records:
# Replace FORMAT tags
if header_element.type == 'FORMAT':
continue
# Source and date handled
if header_element.type == 'GENERIC' and header_element.key.lower() in {'fileformat', 'source', 'filedate'}:
continue
# Write record
header_list.append(str(header_element))
# Add FORMAT tags written by the genotyper
header_list.extend(vcf_get_format_tags())
# Return header lines
return header_list
示例15: match_database2
# 需要導入模塊: import pysam [as 別名]
# 或者: from pysam import VariantFile [as 別名]
def match_database2(args):
"""Match a genome to a database of alleles."""
refs = Fastafile(expanduser(args.reference))
db = VariantFile(expanduser(args.database))
sample = VariantFile(expanduser(args.sample))
try:
sample_name = sample.header.samples[args.name]
except TypeError:
sample_name = args.name
if db.index is None:
raise ValueError('database file must be indexed')
if sample.index is None:
raise ValueError('sample file must be indexed')
# Open tabluar output file, if requested
table = None
if args.table:
tablefile = open(args.table, 'w') if args.table != '-' else sys.stdout
table = csv.writer(tablefile, delimiter='\t', lineterminator='\n')
write_table_header(table)
update_info_header(sample.header)
with VariantFile(args.output, 'w', header=sample.header) as out:
for superlocus, matches in generate_matches(refs, sample, db, args):
clear_info_fields(superlocus)
for allele_locus, allele, match in matches:
dbvar = allele.record
var_id = dbvar.id or f'{dbvar.chrom}_{dbvar.start+1}_{dbvar.stop}_{dbvar.alts[0]}'
status, times = translate_match(match)
for locus in allele_locus:
info = locus.record.info
info[status] = info.get(status, ()) + (var_id, ) * times
write_table_row(table, sample_name, var_id, allele_locus, status, match)
for locus in sorted(superlocus, key=NormalizedLocus.record_order_key):
out.write(locus.record)